IMPORT LIBRARIES

In [2]:
from matplotlib import pyplot as plt
from matplotlib import ticker
from matplotlib.patches import Rectangle
from matplotlib.colors import LinearSegmentedColormap
from sklearn import datasets
import datetime as datetime64 
import pandas as pd
import numpy as np
import warnings
import json
import os
warnings.filterwarnings('ignore')

In [3]:
#Specifying file paths
tornado_data = "Resources/2023_tornado_activity.csv"

#Reading all data into Pandas
tornado_df = pd.read_csv(tornado_data)

In [4]:
tornado_df

Unnamed: 0,om,yr,mo,dy,date,time,tz,st,stf,stn,...,len,wid,ns,sn,sg,f1,f2,f3,f4,fc
0,622081,2023,1,2,2023-01-02,11:47:00,3,AR,5,0,...,0.24,350,1,1,1,139,0,0,0,0
1,622082,2023,1,2,2023-01-02,14:38:00,3,AR,5,0,...,2.09,200,1,1,1,51,0,0,0,0
2,622083,2023,1,2,2023-01-02,15:11:00,3,LA,22,0,...,5.20,70,1,1,1,85,0,0,0,0
3,622084,2023,1,2,2023-01-02,15:43:00,3,LA,22,0,...,0.31,50,1,1,1,81,0,0,0,0
4,622085,2023,1,2,2023-01-02,16:37:00,3,OK,40,0,...,0.10,100,1,1,1,113,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1356,623397,2023,12,10,2023-12-10,04:36:00,3,AL,1,0,...,2.43,400,1,1,1,81,0,0,0,0
1357,623398,2023,12,10,2023-12-10,05:30:00,3,FL,12,0,...,2.97,250,1,1,1,59,0,0,0,0
1358,623399,2023,12,10,2023-12-10,11:28:00,3,NC,37,0,...,1.88,250,1,1,1,183,0,0,0,0
1359,623400,2023,12,17,2023-12-17,12:16:00,3,SC,45,0,...,2.35,150,1,1,1,51,0,0,0,0


In [5]:
#Narrow dataframe down to only columns needed and rename columns
selected_columns = ['yr', 'mo', 'dy', 'date', 'time', 'st', 'mag', 'loss', 'slat', 'slon']

tornado_cleaned_df = tornado_df[selected_columns]

tornado_cleaned_df.head()

Unnamed: 0,yr,mo,dy,date,time,st,mag,loss,slat,slon
0,2023,1,2,2023-01-02,11:47:00,AR,1,25000,33.2939,-92.7057
1,2023,1,2,2023-01-02,14:38:00,AR,1,5000000,34.694,-93.0673
2,2023,1,2,2023-01-02,15:11:00,LA,1,15000,31.7499,-93.4807
3,2023,1,2,2023-01-02,15:43:00,LA,0,10000,32.0582,-93.2735
4,2023,1,2,2023-01-02,16:37:00,OK,-9,0,36.7218,-96.6194


In [6]:
# Rename columns to clarify
tornado_cleaned_df = tornado_cleaned_df.rename(columns=
                                        {'yr': 'Year',
                                            'mo': 'Month',
                                            'dy': 'Day',
                                            'date': 'Date',
                                            'time': 'Time',
                                            'st': 'State',
                                            'mag': 'Magnitude',
                                            'loss': 'Damages',
                                            'slat': 'Starting Lat',
                                            'slon': 'Starting Long'})

tornado_cleaned_df.head()
                                            

Unnamed: 0,Year,Month,Day,Date,Time,State,Magnitude,Damages,Starting Lat,Starting Long
0,2023,1,2,2023-01-02,11:47:00,AR,1,25000,33.2939,-92.7057
1,2023,1,2,2023-01-02,14:38:00,AR,1,5000000,34.694,-93.0673
2,2023,1,2,2023-01-02,15:11:00,LA,1,15000,31.7499,-93.4807
3,2023,1,2,2023-01-02,15:43:00,LA,0,10000,32.0582,-93.2735
4,2023,1,2,2023-01-02,16:37:00,OK,-9,0,36.7218,-96.6194


In [7]:
# Drop all rows with Magnitude of -9
tornado_cleaned_df = tornado_cleaned_df[tornado_cleaned_df["Magnitude"] != -9]
tornado_cleaned_df

Unnamed: 0,Year,Month,Day,Date,Time,State,Magnitude,Damages,Starting Lat,Starting Long
0,2023,1,2,2023-01-02,11:47:00,AR,1,25000,33.2939,-92.7057
1,2023,1,2,2023-01-02,14:38:00,AR,1,5000000,34.6940,-93.0673
2,2023,1,2,2023-01-02,15:11:00,LA,1,15000,31.7499,-93.4807
3,2023,1,2,2023-01-02,15:43:00,LA,0,10000,32.0582,-93.2735
5,2023,1,2,2023-01-02,16:46:00,LA,2,850000,32.1720,-92.7048
...,...,...,...,...,...,...,...,...,...,...
1356,2023,12,10,2023-12-10,04:36:00,AL,0,0,32.4649,-85.4258
1357,2023,12,10,2023-12-10,05:30:00,FL,1,0,30.8054,-85.8331
1358,2023,12,10,2023-12-10,11:28:00,NC,1,0,35.6940,-78.6520
1359,2023,12,17,2023-12-17,12:16:00,SC,1,200000,33.6837,-78.9737


In [8]:
tornado_cleaned_df.dtypes

Year               int64
Month              int64
Day                int64
Date              object
Time              object
State             object
Magnitude          int64
Damages            int64
Starting Lat     float64
Starting Long    float64
dtype: object

In [9]:
# Convert 'time' column to timedelta
tornado_cleaned_df['Time'] = pd.to_timedelta(tornado_cleaned_df['Time'])

# Extract the hour component
tornado_cleaned_df['Hour'] = tornado_cleaned_df['Time'].dt.components['hours']

tornado_cleaned_df

Unnamed: 0,Year,Month,Day,Date,Time,State,Magnitude,Damages,Starting Lat,Starting Long,Hour
0,2023,1,2,2023-01-02,0 days 11:47:00,AR,1,25000,33.2939,-92.7057,11
1,2023,1,2,2023-01-02,0 days 14:38:00,AR,1,5000000,34.6940,-93.0673,14
2,2023,1,2,2023-01-02,0 days 15:11:00,LA,1,15000,31.7499,-93.4807,15
3,2023,1,2,2023-01-02,0 days 15:43:00,LA,0,10000,32.0582,-93.2735,15
5,2023,1,2,2023-01-02,0 days 16:46:00,LA,2,850000,32.1720,-92.7048,16
...,...,...,...,...,...,...,...,...,...,...,...
1356,2023,12,10,2023-12-10,0 days 04:36:00,AL,0,0,32.4649,-85.4258,4
1357,2023,12,10,2023-12-10,0 days 05:30:00,FL,1,0,30.8054,-85.8331,5
1358,2023,12,10,2023-12-10,0 days 11:28:00,NC,1,0,35.6940,-78.6520,11
1359,2023,12,17,2023-12-17,0 days 12:16:00,SC,1,200000,33.6837,-78.9737,12


In [10]:
# Drop the 'Time' and 'Date' columns
tornado_cleaned_df = tornado_cleaned_df.drop(columns=['Time'])
tornado_cleaned_df

Unnamed: 0,Year,Month,Day,Date,State,Magnitude,Damages,Starting Lat,Starting Long,Hour
0,2023,1,2,2023-01-02,AR,1,25000,33.2939,-92.7057,11
1,2023,1,2,2023-01-02,AR,1,5000000,34.6940,-93.0673,14
2,2023,1,2,2023-01-02,LA,1,15000,31.7499,-93.4807,15
3,2023,1,2,2023-01-02,LA,0,10000,32.0582,-93.2735,15
5,2023,1,2,2023-01-02,LA,2,850000,32.1720,-92.7048,16
...,...,...,...,...,...,...,...,...,...,...
1356,2023,12,10,2023-12-10,AL,0,0,32.4649,-85.4258,4
1357,2023,12,10,2023-12-10,FL,1,0,30.8054,-85.8331,5
1358,2023,12,10,2023-12-10,NC,1,0,35.6940,-78.6520,11
1359,2023,12,17,2023-12-17,SC,1,200000,33.6837,-78.9737,12


In [11]:
# Export clean df as csv in Resources folder for communal access
tornado_cleaned_df.to_csv("Resources/tornado_cleaned.csv")

In [22]:
# Calculating Damages quantiles
percentiles = tornado_cleaned_df['Damages'].quantile(q=[0.3, 0.42, 0.57, 0.9])
percentiles

0.30         0.0
0.42         0.0
0.57      5000.0
0.90    300000.0
Name: Damages, dtype: float64

In [11]:
#Load data again
tornado_cleaned_df

Unnamed: 0,Year,Month,Day,Date,State,Magnitude,Damages,Starting Lat,Starting Long,Hour
0,2023,1,2,2023-01-02,AR,1,25000,33.2939,-92.7057,11
1,2023,1,2,2023-01-02,AR,1,5000000,34.6940,-93.0673,14
2,2023,1,2,2023-01-02,LA,1,15000,31.7499,-93.4807,15
3,2023,1,2,2023-01-02,LA,0,10000,32.0582,-93.2735,15
5,2023,1,2,2023-01-02,LA,2,850000,32.1720,-92.7048,16
...,...,...,...,...,...,...,...,...,...,...
1356,2023,12,10,2023-12-10,AL,0,0,32.4649,-85.4258,4
1357,2023,12,10,2023-12-10,FL,1,0,30.8054,-85.8331,5
1358,2023,12,10,2023-12-10,NC,1,0,35.6940,-78.6520,11
1359,2023,12,17,2023-12-17,SC,1,200000,33.6837,-78.9737,12


In [12]:
# Convert DataFrame to GeoJSON
def df_to_geojson(df):
    geojson_data = {
        "type": "FeatureCollection",
        "features": [
            {
                "type": "Feature",
                "geometry": {
                    "type": "Point",
                    "coordinates": [row['Starting Long'], row['Starting Lat']],
                },
                "properties": {
                    "year": row['Year'],
                    "month": row['Month'],
                    "day": row['Day'],
                    "date": row['Date'],
                    "state": row['State'],
                    "magnitude": row['Magnitude'],
                    "damages": row['Damages'],
                },
            }
            for idx, row in df.iterrows()
        ],
    }
    return geojson_data

In [13]:
# Convert DataFrame to GeoJSON and save
geojson_data = df_to_geojson(tornado_cleaned_df)

In [14]:
# Define the full path to save the GeoJSON file in the Resources folder
geojson_file_path = os.path.join('Resources', 'tornado_data.geojson')

In [15]:
# Save the GeoJSON data to the specified path
with open(geojson_file_path, 'w') as f:
    json.dump(geojson_data, f)