For "Tornado Region" - with all tornadoes. Output: regressdf_all.csv

In [1]:

import pandas as pd
import requests
from io import BytesIO
import numpy as np

base_url = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"
creation = "20250520"

#function for downloading files
def download_csv_gz(file_name):
    url = base_url + file_name
    print(f"Downloading {url}")
    r = requests.get(url)
    r.raise_for_status()
    return pd.read_csv(BytesIO(r.content), compression='gzip', low_memory=False)

data = {}

#creating a dictionary where each entry data[i] is a dataframe with information on tornadoes of year i
for i in range(1955,2016):
    year = i
    details_file = f"StormEvents_details-ftp_v1.0_d{year}_c{creation}.csv.gz"
    df_details = download_csv_gz(details_file)

    df_details.columns = df_details.columns.str.lower()

    assert 'event_id' in df_details.columns, "event_id missing from details"


    df_clean = df_details[['state','year', 'begin_lat','begin_lon']]

    exclude_states =  ['ALASKA', 'HAWAII', 'PUERTO RICO', 'GUAM', 'AMERICAN SAMOA', 'COMMONWEALTH OF THE NORTHERN MARIANA ISLANDS', 'UNITED STATES MINOR OUTLYING ISLANDS']

    state_filtered_df = df_clean[~df_clean['state'].isin(exclude_states)]
    state_filtered_df_clean = state_filtered_df.dropna()

    #state_filtered_df_clean.to_csv(f"{year}_summary.csv", index=False)
    df = state_filtered_df_clean

    data[i] = df



datan = {}


for i in range(1955,2016):
    datanow = data[i]

    # Drop rows with begin_lat < 31 - to exclude potentially hurricane related tornadoes
    datanow = datanow[datanow['begin_lat'] >= 31].copy()
    # Step 1: Compute center
    center_lat = datanow['begin_lat'].mean()
    center_lon = datanow['begin_lon'].mean()

    #Compute Euclidean distance from center
    datanow['dist_from_center'] = np.sqrt((datanow['begin_lat'] - center_lat)**2 + (datanow['begin_lon'] - center_lon)**2)

    # Remove top 10% furthest points
    threshold = datanow['dist_from_center'].quantile(0.90)
    filtered_data = datanow[datanow['dist_from_center'] <= threshold].copy()

    # Clean up
    filtered_data.drop(columns='dist_from_center', inplace=True)

    datan[i] = filtered_data
    
    # Create an empty list to store rows
rows = []

for i in range(1955,2016):  # Example loop
    a1 = i
    math_data_df = datan[i][['year','begin_lat','begin_lon']]
    a2 = math_data_df['begin_lat'].mean()
    a3 = math_data_df['begin_lat'].std()
    a4 = math_data_df['begin_lon'].mean()
    a5 = math_data_df['begin_lon'].std()
    
    rows.append([a1, a2, a3, a4 , a5])  # Append the row as a list

# Create DataFrame once after the loop
regress_df_all = pd.DataFrame(rows, columns=['year', 'begin_lat_mean', 'begin_lat_std', 'begin_lon_mean', 'begin_lon_std'])

regress_df_all.to_csv("regressdf_all.csv", index=False)

    

Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1955_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1956_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1957_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1958_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1959_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1960_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1961_c20250520.csv.gz
Downloading https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/StormEvents_details-ftp_v1.0_d1962_c20250520.csv.gz


For "Tornado Alley" - with high intensity tornadoes (EF3-5). Output: regressdf.csv

In [None]:
import pandas as pd
import requests
from io import BytesIO

base_url = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"
creation = "20250520"

#function for downloading files
def download_csv_gz(file_name):
    url = base_url + file_name
    print(f"Downloading {url}")
    r = requests.get(url)
    r.raise_for_status()
    return pd.read_csv(BytesIO(r.content), compression='gzip', low_memory=False)

data_low = {}
data_high = {}

group1 = ['F0', 'F1', 'F2']
group2 = ['F3', 'F4', 'F5']

for i in range(1955,2007):
    year = i
    details_file = f"StormEvents_details-ftp_v1.0_d{year}_c{creation}.csv.gz"
    df_details = download_csv_gz(details_file)

    df_details.columns = df_details.columns.str.lower()

    assert 'event_id' in df_details.columns, "event_id missing from details"


    df_clean = df_details[['state','year', 'begin_lat','begin_lon','tor_f_scale']]

    #df_clean.to_csv("{year}file.csv", index=False)
    #print(tornado_summary.head())
    exclude_states =  ['ALASKA', 'HAWAII', 'PUERTO RICO', 'GUAM', 'AMERICAN SAMOA', 'COMMONWEALTH OF THE NORTHERN MARIANA ISLANDS', 'UNITED STATES MINOR OUTLYING ISLANDS']

    state_filtered_df = df_clean[~df_clean['state'].isin(exclude_states)]
    state_filtered_df_clean = state_filtered_df.dropna()

    #state_filtered_df_clean.to_csv(f"{year}_summary.csv", index=False)
    df = state_filtered_df_clean
    # Split the DataFrame
    df1 = df[df['tor_f_scale'].isin(group1)].copy()
    df2 = df[df['tor_f_scale'].isin(group2)].copy()

    data_low[i] = df1
    data_high[i] = df2

ngroup1 = ['EF0', 'EF1', 'EF2']
ngroup2 = ['EF3', 'EF4', 'EF5']

for i in range(2007,2016):
    year = i
    details_file = f"StormEvents_details-ftp_v1.0_d{year}_c{creation}.csv.gz"
    df_details = download_csv_gz(details_file)

    df_details.columns = df_details.columns.str.lower()

    assert 'event_id' in df_details.columns, "event_id missing from details"


    df_clean = df_details[['state','year', 'begin_lat','begin_lon','tor_f_scale']]

    #df_clean.to_csv("{year}file.csv", index=False)
    #print(tornado_summary.head())
    exclude_states =  ['ALASKA', 'HAWAII', 'PUERTO RICO', 'GUAM', 'AMERICAN SAMOA', 'COMMONWEALTH OF THE NORTHERN MARIANA ISLANDS', 'UNITED STATES MINOR OUTLYING ISLANDS']

    state_filtered_df = df_clean[~df_clean['state'].isin(exclude_states)]
    state_filtered_df_clean = state_filtered_df.dropna()

    state_filtered_df_clean.to_csv(f"{year}_summary.csv", index=False)
    df = state_filtered_df_clean
    # Split the DataFrame
    df1 = df[df['tor_f_scale'].isin(ngroup1)].copy()
    df2 = df[df['tor_f_scale'].isin(ngroup2)].copy()

    data_low[i] = df1
    data_high[i] = df2

datan = {}

for i in range(1955,2016):
    datanow = data_high[i]

    # Drop rows with begin_lat < 31 - to exclude potentially hurricane related tornadoes
    datanow = datanow[datanow['begin_lat'] >= 31].copy()
    #Compute center
    center_lat = datanow['begin_lat'].mean()
    center_lon = datanow['begin_lon'].mean()

    #Compute Euclidean distance from center
    datanow['dist_from_center'] = np.sqrt((datanow['begin_lat'] - center_lat)**2 + (datanow['begin_lon'] - center_lon)**2)

    # Remove top 10% furthest points
    threshold = datanow['dist_from_center'].quantile(0.90)
    filtered_data = datanow[datanow['dist_from_center'] <= threshold].copy()

    # Clean up
    filtered_data.drop(columns='dist_from_center', inplace=True)

    datan[i] = filtered_data
    
    # Create an empty list to store rows
rows = []

for i in range(1955,2016):  # Example loop
    a1 = i
    math_data_df = datan[i][['year','begin_lat','begin_lon']]
    a2 = math_data_df['begin_lat'].mean()
    a3 = math_data_df['begin_lat'].std()
    a4 = math_data_df['begin_lon'].mean()
    a5 = math_data_df['begin_lon'].std()
    
    rows.append([a1, a2, a3, a4 , a5])  # Append the row as a list

# Create DataFrame once after the loop
regress_df = pd.DataFrame(rows, columns=['year', 'begin_lat_mean', 'begin_lat_std', 'begin_lon_mean', 'begin_lon_std'])

regress_df = pd.read_csv("regressdf.csv")
