In [14]:
import requests
from bs4 import BeautifulSoup

# URL of the NOAA Storm Events CSV files, remember to check for updated links regularly 
base_url = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"

# Fetch the page content
response = requests.get(base_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Extract all .csv.gz file links
file_links = [base_url + node.get('href') for node in soup.find_all('a') if node.get('href').endswith('.csv.gz')]


In [15]:
import re

# Specify the years you're interested in, this will align with the range given in FRED file
years = [str(year) for year in range(1996, 2025)]  # Adjust the range as needed

# Filter for 'details' files for the specified years
details_files = [link for link in file_links if 'details' in link and any(year in link for year in years)]


In [None]:
import pandas as pd
import os
import gzip
import shutil

# Create a directory to store the downloaded files
os.makedirs('data_raw', exist_ok=True)

# an empty list to store DataFrames
df_list = []

for file_url in details_files:
    # Extract the filename from the URL
    filename = file_url.split('/')[-1]
    compressed_path = os.path.join('data_raw', filename)
    decompressed_path = compressed_path[:-3]  # Remove '.gz' extension

    # Download the file
    with requests.get(file_url, stream=True) as r:
        with open(compressed_path, 'wb') as f:
            shutil.copyfileobj(r.raw, f)

    # Decompress the file
    with gzip.open(compressed_path, 'rb') as f_in:
        with open(decompressed_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

    # Load the CSV into a DataFrame and append to the list
    df = pd.read_csv(decompressed_path, low_memory=False)
    df_list.append(df)

# Combine all DataFrames into a single DataFrame, this will make it easier as there are MANY files for the range 
combined_df = pd.concat(df_list, ignore_index=True)

# Save the combined DataFrame to a CSV file
combined_df.to_csv('data_raw/StormEvents_combined.csv', index=False)

print("All files downloaded, decompressed, and combined successfully:)")
