# US Billion-Dollar Disaster Summary Crawler
Script to obtain additional information on individual BDD events, available from [data](https://www.ncei.noaa.gov/access/billions/state-summary/US) published by the National Centers for Environmental Information of the United States of America.

*To run this Python notebook, ensure all required packages in `requirements.txt` are installed on your Python environment*.

In [1]:
""" Initialisations """
from bs4 import BeautifulSoup as bs_4
import requests
import pandas as pd

source_url = "https://www.ncei.noaa.gov/access/billions/state-summary/US"
response = requests.get(source_url)

In [2]:
""" Data Parsing """
# Crawl target table
us_summary_table = bs_4(response.text, "lxml").find(attrs={"id": "events-table"})

# Extract headers
headers = [th.text.strip() for th in us_summary_table.find_all('th')]

# Extract rows
rows = []
for row in us_summary_table.find_all('tr')[1:]:
    cells = row.find_all('td')
    if len(cells) > 0:
        current_row = []
        for idx, cell in enumerate(cells):
            if idx == 0:
                disaster_name = cell.find(attrs={"class": "name"}).text.strip()
                disaster_date = cell.find(attrs={"class": "dates"}).text.strip()
                current_row.append(f"{disaster_name} ({disaster_date})")
            else:
                current_row.append(cell.text.strip())
        rows.append(current_row)

# Parse data into a dataframe
df_us_summary = pd.DataFrame(rows, columns=headers)

In [3]:
""" Basic Data Cleaning """
# Rename according to events-US-1980-2024.csv column names
df_us_summary.rename(columns={ 'Event': 'Name', 'Type': 'Disaster', 'BeginDate': 'Begin Date', 'EndDate': 'End Date', 'Summary': 'Summary', 'CPI-AUnadjusted\n                    EstimatedCost(in Billions)': 'CPI-Adjusted Cost', 'Deaths': 'Deaths'}, inplace=True)

# Clean BDD costs
df_us_summary['CPI-Adjusted Cost'] = df_us_summary['CPI-Adjusted Cost'].str.extract(r'(\d+\.\d+)').astype(float)

# Format dates
df_us_summary["Begin Date"] = pd.to_datetime(df_us_summary['Begin Date'], format='%B %d, %Y').dt.strftime('%d/%m/%Y')
df_us_summary["End Date"] = pd.to_datetime(df_us_summary['End Date'], format='%B %d, %Y').dt.strftime('%d/%m/%Y')

In [4]:
""" Data Export """
df_us_summary.to_csv("additional-events-US-1980-2024.csv", index=False)

In [5]:
""" Data Preview """
# Crawled data
df_us_summary

Unnamed: 0,Name,Disaster,Begin Date,End Date,Summary,CPI-Adjusted Cost,Deaths
0,Southern and Eastern Severe Weather (April 2024),Severe Storm,08/04/2024,11/04/2024,Southern and eastern severe weather produced t...,1.5,0
1,Central Tornado Outbreak and Eastern Severe We...,Severe Storm,01/04/2024,03/04/2024,A central tornado outbreak produced more than ...,1.8,3
2,Central and Southern Severe Weather (March 2024),Severe Storm,13/03/2024,15/03/2024,"Damaging hail, tornadoes and high wind from se...",4.4,3
3,Southern Severe Weather (February 2024),Severe Storm,10/02/2024,12/02/2024,Severe storms produced up to golf ball sized h...,1.2,0
4,Central and Southern Winter Storm and Cold Wav...,Winter Storm,14/01/2024,18/01/2024,A bitterly cold airmass affected numerous cent...,1.3,41
...,...,...,...,...,...,...,...
378,"Severe Storms, Flash Floods, Hail, Tornadoes (...",Severe Storm,05/05/1981,10/05/1981,Severe storms cause damage across the Midwest ...,1.4,20
379,Florida Freeze (January 1981),Freeze,12/01/1981,14/01/1981,Severe freeze heavily damaged fruit crops acro...,2.1,0
380,Central/Eastern Drought/Heat Wave (Summer-Fall...,Drought,01/06/1980,30/11/1980,Central and eastern U.S. drought/heat wave cau...,40.2,1260
381,Hurricane Allen (August 1980),Tropical Cyclone,07/08/1980,11/08/1980,Category 3 hurricane makes landfall north of B...,2.2,13


In [6]:
# Original data to be combined with the crawled data
df_events = pd.read_csv("../events-US-1980-2024.csv", skiprows=2)
df_events

Unnamed: 0,Name,Disaster,Begin Date,End Date,CPI-Adjusted Cost,Unadjusted Cost,Deaths
0,Southern Severe Storms and Flooding (April 1980),Flooding,19800410,19800417,2728.1,706.8,7
1,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2212.6,590.0,13
2,Central/Eastern Drought/Heat Wave (Summer-Fall...,Drought,19800601,19801130,40180.2,10020.0,1260
3,Florida Freeze (January 1981),Freeze,19810112,19810114,2053.5,572.0,0
4,"Severe Storms, Flash Floods, Hail, Tornadoes (...",Severe Storm,19810505,19810510,1397.0,401.4,20
...,...,...,...,...,...,...,...
378,Central and Southern Winter Storm and Cold Wav...,Winter Storm,20240114,20240118,1345.3,1332.0,41
379,Southern Severe Weather (February 2024),Severe Storm,20240210,20240212,1150.5,1139.0,0
380,Central and Southern Severe Weather (March 2024),Severe Storm,20240313,20240315,4354.1,4311.0,3
381,Central Tornado Outbreak and Eastern Severe We...,Severe Storm,20240401,20240403,1823.0,1823.0,3
