This file downloads NOAA's StormEvents data from 1950 to 2024 and selects from the obtained dataframe the entries corresponding to relevant tornadoes.

In [None]:
import pandas as pd
import requests
from io import BytesIO

base_url = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"
creation_date = "20250520"  
all_years_data = []


for year in range(1950, 2025):
    print(f"\nProcessing year: {year}")

    if year == 1950:
        date = 20250401
    elif year == 2020:
        date = 20240620
    else:
        date = creation_date
    details_file = f"StormEvents_details-ftp_v1.0_d{year}_c{date}.csv.gz"


    def download_csv_gz(file_name):
        url = base_url + file_name
        try:
            r = requests.get(url)
            r.raise_for_status()
            return pd.read_csv(BytesIO(r.content), compression='gzip', low_memory=False)
        except Exception as e:
            print(f"Failed to load {file_name}: {e}")
            return None

    df_details = download_csv_gz(details_file)

    if df_details is None:
        continue

    df_details.columns = df_details.columns.str.lower()

    if 'event_id' not in df_details.columns:
        continue

    if year >= 1950 and year <= 2007:
        f_list = ['F0', 'F1', 'F2', 'F3', 'F4', 'F5']
    else:
        f_list = ['EF0', 'EF1', 'EF2', 'EF3', 'EF4', 'EF5']
    df_tornadoes = df_details[
    (df_details['event_type'] == 'Tornado') &
    (df_details['tor_f_scale'].isin(f_list))].copy()

    df_tornadoes['year'] = year  


    if df_tornadoes.empty:
        continue

    
    df_tornadoes['year'] = year
    all_years_data.append(df_tornadoes[['state', 'begin_lat', 'begin_lon', 'tor_f_scale', 'year']])


combined_df = pd.concat(all_years_data, ignore_index=True)

def year_to_bin(y):
    base = y - (y % 4)
    return f"{base}-{base + 3}"

combined_df['year_bin'] = combined_df['year'].apply(year_to_bin)


#binned_summary = combined_df.groupby(['year_bin', 'state', 'begin_lat', 'begin_lon', 'tor_f_scale']) \
#                            .size().reset_index(name='count')


#binned_summary.to_csv("1952_2007.csv", index=False)




Processing year: 1950

Processing year: 1951

Processing year: 1952

Processing year: 1953

Processing year: 1954

Processing year: 1955

Processing year: 1956

Processing year: 1957

Processing year: 1958

Processing year: 1959

Processing year: 1960

Processing year: 1961

Processing year: 1962

Processing year: 1963

Processing year: 1964

Processing year: 1965

Processing year: 1966

Processing year: 1967

Processing year: 1968

Processing year: 1969

Processing year: 1970

Processing year: 1971

Processing year: 1972

Processing year: 1973

Processing year: 1974

Processing year: 1975

Processing year: 1976

Processing year: 1977

Processing year: 1978

Processing year: 1979

Processing year: 1980

Processing year: 1981

Processing year: 1982

Processing year: 1983

Processing year: 1984

Processing year: 1985

Processing year: 1986

Processing year: 1987

Processing year: 1988

Processing year: 1989

Processing year: 1990

Processing year: 1991

Processing year: 1992

Processing

In [16]:
combined_df.to_csv('all_tornadoes.csv')