In [14]:
import requests
import re
import pandas as pd

# Initialize a dictionary to store data for each month
data_dict = {}

# Define base URL
base_url = "https://www.ncei.noaa.gov/access/monitoring/tornadoes/1/{month}/zingchart-config.js"

# Loop through each month
for month in range(1, 13):
    # Fetch response
    url = base_url.format(month=month)
    response = requests.get(url)
    
    if response.status_code == 200:
        js_text = response.text

        # Regex patterns to capture years and tornado counts
        years_pattern = re.compile(r'scaleX:\s*{[^}]*labels:\s*\[([^\]]+)\]')
        tornado_counts_pattern = re.compile(r'values:\s*\[([^\]]+)\]', re.DOTALL)

        # Extract years
        years_match = years_pattern.search(js_text)
        if years_match:
            years = [int(year.strip()) for year in years_match.group(1).split(',')]
        else:
            print(f"Years data pattern not found for month {month}")
            continue

        # Extract tornado counts
        counts_match = tornado_counts_pattern.findall(js_text)
        if counts_match:
            # We might have multiple `values` arrays, let's check if one has the correct length
            tornado_counts = None
            for match in counts_match:
                counts = [int(count.strip()) if count.strip() != 'null' else None for count in match.split(',')]
                if len(counts) == len(years):  # Only accept if counts length matches years
                    tornado_counts = counts
                    break

            if tornado_counts:
                print(f"Month {month} tornado counts extracted.")  # Confirmation message
            else:
                print(f"Tornado counts data did not match expected length for month {month}")
                continue
        else:
            print(f"Tornado counts data pattern not found for month {month}")
            continue

        # Add data to the dictionary with years as keys and month-specific counts
        for year, count in zip(years, tornado_counts):
            if year not in data_dict:
                data_dict[year] = {}
            data_dict[year][f'Month_{month}'] = count
    else:
        print(f"Failed to retrieve data for month {month}")

# Convert dictionary to DataFrame
all_data = pd.DataFrame.from_dict(data_dict, orient='index').sort_index()
all_data.index.name = 'Year'

# Display the structured DataFrame
print(all_data)


Month 1 tornado counts extracted.
Month 2 tornado counts extracted.
Month 3 tornado counts extracted.
Month 4 tornado counts extracted.
Month 5 tornado counts extracted.
Month 6 tornado counts extracted.
Month 7 tornado counts extracted.
Month 8 tornado counts extracted.
Month 9 tornado counts extracted.
Month 10 tornado counts extracted.
Month 11 tornado counts extracted.
Month 12 tornado counts extracted.
      Month_1  Month_2  Month_3  Month_4  Month_5  Month_6  Month_7  Month_8  \
Year                                                                           
1950        7       20       21       15       61       28       23       13   
1951        2       10        6       26       57       76       23       27   
1952       12       27       43       37       34       34       27       16   
1953       14       16       40       47       94      111       32       24   
1954        2       17       62      113      101      107       45       49   
...       ...      ...      .

In [15]:
#remove last line
all_data = all_data.drop(all_data.tail(1).index)

# name the columns with month names
all_data.columns = ['January', 'February', 'March', 'April', 'May', 'June',
                    'July', 'August', 'September', 'October', 'November', 'December']

Unnamed: 0_level_0,January,February,March,April,May,June,July,August,September,October,November,December
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014,4,42,20,129,130,286,85,33,41,73.0,23.0,20.0
2015,28,3,11,171,381,184,115,45,17,40.0,99.0,83.0
2016,17,103,85,142,217,86,108,90,39,20.0,50.0,19.0
2017,137,69,192,214,291,146,81,119,51,75.0,42.0,12.0
2018,15,48,55,130,170,155,92,81,108,123.0,83.0,66.0
2019,22,27,107,272,510,177,101,78,85,65.0,16.0,57.0
2020,87,42,83,264,126,91,99,182,38,19.0,24.0,27.0
2021,16,11,138,78,259,105,126,153,28,147.0,21.0,232.0
2022,37,11,234,219,239,123,64,35,25,36.0,62.0,58.0
2023,130,56,208,149,168,253,114,145,27,32.0,18.0,21.0


In [16]:
# cast all data to integer
all_data = all_data.astype('Int64')

In [17]:
all_data.tail(10)

Unnamed: 0_level_0,January,February,March,April,May,June,July,August,September,October,November,December
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014,4,42,20,129,130,286,85,33,41,73,23,20
2015,28,3,11,171,381,184,115,45,17,40,99,83
2016,17,103,85,142,217,86,108,90,39,20,50,19
2017,137,69,192,214,291,146,81,119,51,75,42,12
2018,15,48,55,130,170,155,92,81,108,123,83,66
2019,22,27,107,272,510,177,101,78,85,65,16,57
2020,87,42,83,264,126,91,99,182,38,19,24,27
2021,16,11,138,78,259,105,126,153,28,147,21,232
2022,37,11,234,219,239,123,64,35,25,36,62,58
2023,130,56,208,149,168,253,114,145,27,32,18,21


In [18]:
# save to csv
all_data.to_csv('tornado_data.csv')