In [None]:
#----------------------------------------------
#This Notebook Cleans the data provided by FEMA
#----------------------------------------------

import pandas as pd
import numpy as np
import os as os
import csv


In [None]:
#Read and store the FEMA Disaster data

FEMA_csv = os.path.join("..","Data","Fema_Declaration_dates.csv")

FEMA_df = pd.read_csv(FEMA_csv)

#Delete the columns that are not part of the original dataset
del FEMA_df["Unnamed: 16"]
del FEMA_df["Unnamed: 17"]
del FEMA_df["Unnamed: 18"]
del FEMA_df["Unnamed: 19"]
del FEMA_df["Unnamed: 20"]
del FEMA_df["Unnamed: 21"]
del FEMA_df["Unnamed: 22"]
del FEMA_df["Unnamed: 23"]

#Deleting the FEMA disaster close out date as through investigation, descovered that 352 disasters do not have a closeout data assigned. This was causing 352 disasters to be dropped when dropna.
del FEMA_df["disasterCloseOutDate"]

In [None]:
FEMA_df.count()

In [None]:
#Drop disaster rows where a county is not listed
FEMA_Clean_df = FEMA_df.dropna(axis = 0, how="any")

In [None]:
FEMA_Clean_df.count()

In [None]:
#Rename columns to be clean
FEMA_Clean_df = FEMA_Clean_df.rename(columns={
    "disasterNumber":"Disaster Number",
    "ihProgramDeclared":"Household Program Declared",
    "iaProgramDeclared":"Individual Assistance Program Declared",
    "paProgramDeclared":"Public Assistance Program Declared",
    "hmProgramDeclared":"Hazard Mitigation Program Declared",
    "state":"State",
    "declarationDate":"Declaration Date",
    "fyDeclared":"Fiscal Year Declared",
    "disasterType":"Disaster Type",
    "incidentType":"Incident Type",
    "title":"Title",
    "incidentBeginDate":"Incident Begin Date",
    "incidentEndDate":"Incident End Date",
    "disasterCloseOutDate":"Disaster Close Out Date",
    "declaredCountyArea":"Declared County Area",
    "placeCode":"Place Code"
})

In [None]:
#Check to see the unique incident types that we will then groupby MAY 1996
FEMA_Clean_df["Incident Type"].unique()

In [None]:
#Our Zillow dataset starts at May 1996, so we want our FEMA data frame to start with declarations from 1996 to present
FEMA96_df = FEMA_Clean_df[FEMA_Clean_df["Incident Begin Date"]>="1997-01"]
#Reset the index
FEMA96_df.reset_index(drop=True, inplace=True)

In [None]:
#The values for the County Name field are not in the same format as the Zillow values - FEMA has "(County)"
name = FEMA96_df["Declared County Area"].str.split(pat="(", n=1, expand=True)
name = (name +"County")
# print(name)
FEMA96_df["CountyName"] = name[0]

In [None]:
#Check how many disasters were declared for each incident type
FEMA96_df["Incident Type"].value_counts() 

In [None]:
#Keep the top 6 disaster types of which we have over a 1000 disasters of each
FEMA96TopTypes_df = FEMA96_df[ (FEMA96_df["Incident Type"]=="Severe Storm(s)") | (FEMA96_df["Incident Type"]=="Hurricane") | (FEMA96_df["Incident Type"]=="Fire") | (FEMA96_df["Incident Type"]=="Snow") | (FEMA96_df["Incident Type"]=="Flood") | (FEMA96_df["Incident Type"]=="Severe Ice Storm")]
#Check that the code worked properly
FEMA96TopTypes_df["Incident Type"].value_counts()

In [None]:
#Separate into a single dataframe for each incident type, if needed
FEMA96_SevereStorm_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Storm(s)" ] 
FEMA96_Hurricane_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Hurricane" ] 
FEMA96_Fire_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Fire" ] 
FEMA96_Snow_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Snow" ] 
FEMA96_Flood_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Flood" ] 
FEMA96_SevereIce_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Ice Storm" ] 

In [None]:
#Output the FEMA96TopTypes into a csv file
FEMA96TopTypes_df.to_csv("..\Cleaned Data\Clean FEMA 1996-Present Disasters Top 6 Types.csv", index = False, header = True) 