# Performing some Exploratory Data Analysis on US Natural Disasters

In [1]:
import pandas as pd
import numpy as np
import datetime

In [2]:
#importing data
data = pd.read_csv(r'0209268\8.8\data\0-data\events-US-1980-2021.csv', skiprows = 1, header = 0) #note that due to file names beginnning with numbers, need r in front for Raw String

In [3]:
#viewing imported data to make sure import was successful
data.head()

Unnamed: 0,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
0,Southern Severe Storms and Flooding (April 1980),Flooding,19800410,19800417,2487.8,7
1,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2076.8,13
2,Central/Eastern Drought/Heatwave (Summer-Fall ...,Drought,19800601,19801130,35270.4,1260
3,Florida Freeze (January 1981),Freeze,19810112,19810114,1796.1,0
4,"Severe Storms, Flash Floods, Hail, Tornadoes (...",Severe Storm,19810505,19810510,1260.4,20


In [4]:
#removing the date from the Name column
data['Name']  = [name[0] for name in data['Name'].str.split('(')]

In [5]:
#changing Begin Date and End Date to strings
data[['Begin Date', 'End Date']] = data[['Begin Date','End Date']].astype({'Begin Date' : 'str', 'End Date' : 'str'})

In [6]:
#making Begin date and end date datetime objects
data['Begin Date'] = pd.to_datetime(data['Begin Date'], format = "%Y%m%d")

data['End Date'] = pd.to_datetime(data['End Date'], format = "%Y%m%d")

In [7]:
#changing dtypes

data['Total CPI-Adjusted Cost (Millions of Dollars)'].unique()

array(['2487.8', '2076.8', '35270.4', '1796.1', '1260.4', '1920.2',
       '1401.8', '1391.8', '4454.6', '4348.9', '8400.0', '8299.4',
       '5616.2', '1616.2', '1195.8', '3108.0', '2168.0', '3360.3',
       '2217.2', '3876.5', '3514.0', '1324.6', '4509.1', '47209.2',
       '1246.4', '20339.8', '6787.1', '4520.0', '1537.2', '2164.9',
       '1763.1', '7310.0', '1309.6', '3050.9', '6107.4', '6699.0',
       '1592.5', '1889.7', '1481.1', '53460.0', '6138.0', '1300.9',
       '4916.3', '10611.8', '1217.2', '40244.9', '2415.2', '2633.3',
       '1944.6', '5605.2', '1842.1', '1867.2', '1870.0', '1355.9',
       '4550.0', '9985.4', '1547.0', '3822.0', '1761.8', '8534.0',
       '5315.2', '1784.7', '3170.9', '8862.4', '5189.3', '1681.6',
       '6323.6', '2369.1', '1727.7', '2756.8', '1923.7', '1183.0',
       '1656.2', '5962.4', '10113.9', '1592.1', '4225.0', '1724.2',
       '1473.0', '4173.2', '10839.9', '1143.7', '1104.0', '1741.5',
       '1458.0', '8095.5', '4828.0', '13294.3', '3224.

In [8]:
#looks like that the column Total CPI-Adjusted Cost (Millions of Dollars) has actual string of TBD, will replace this with 0
data['Total CPI-Adjusted Cost (Millions of Dollars)'] = data['Total CPI-Adjusted Cost (Millions of Dollars)'].replace('TBD', 0)

In [9]:
#changing dtype of 'Total CPI-Adjusted Cost (Millions of Dollars)' to a float
data['Total CPI-Adjusted Cost (Millions of Dollars)'] = data['Total CPI-Adjusted Cost (Millions of Dollars)'].astype(float)

In [10]:
data.groupby(['Disaster']).agg({'Total CPI-Adjusted Cost (Millions of Dollars)': sum, 'Deaths': sum})

Unnamed: 0_level_0,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
Disaster,Unnamed: 1_level_1,Unnamed: 2_level_1
Drought,272134.8,4139
Flooding,161833.4,624
Freeze,32355.1,162
Severe Storm,320336.9,1786
Tropical Cyclone,1117674.0,6697
Wildfire,107877.2,399
Winter Storm,74326.2,1223


In [11]:
data.groupby(['Disaster']).agg({'Total CPI-Adjusted Cost (Millions of Dollars)': 'mean', 'Deaths': 'mean'}).sort_values('Total CPI-Adjusted Cost (Millions of Dollars)')

Unnamed: 0_level_0,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
Disaster,Unnamed: 1_level_1,Unnamed: 2_level_1
Severe Storm,2271.892908,12.666667
Freeze,3595.011111,18.0
Winter Storm,3911.905263,64.368421
Flooding,4623.811429,17.828571
Wildfire,5677.747368,21.0
Drought,9383.958621,142.724138
Tropical Cyclone,19958.464286,119.589286


In [12]:
data.groupby(['Disaster']).agg({'Total CPI-Adjusted Cost (Millions of Dollars)': 'median', 'Deaths': 'median'}).sort_values('Total CPI-Adjusted Cost (Millions of Dollars)')

Unnamed: 0_level_0,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
Disaster,Unnamed: 1_level_1,Unnamed: 2_level_1
Severe Storm,1666.1,2.0
Winter Storm,2224.8,25.0
Flooding,2487.8,11.0
Wildfire,2633.3,12.0
Freeze,3108.0,0.0
Drought,5225.4,15.0
Tropical Cyclone,7716.95,24.0


In [16]:
#writing to file
data.to_csv('US Natural Disasters Damage.csv', index = False)