In [1]:
#----------------------------------------------
#This Notebook Cleans the data provided by FEMA
#----------------------------------------------

import pandas as pd
import numpy as np
import os as os
import csv


In [2]:
#Read and store the FEMA Disaster data

FEMA_csv = os.path.join("..","Data","Fema_Declaration_dates.csv")

FEMA_df = pd.read_csv(FEMA_csv)

#Delete the columns that are not part of the original dataset
del FEMA_df["Unnamed: 16"]
del FEMA_df["Unnamed: 17"]
del FEMA_df["Unnamed: 18"]
del FEMA_df["Unnamed: 19"]
del FEMA_df["Unnamed: 20"]
del FEMA_df["Unnamed: 21"]
del FEMA_df["Unnamed: 22"]
del FEMA_df["Unnamed: 23"]

#Deleting the FEMA disaster close out date as through investigation, descovered that 352 disasters do not have a closeout data assigned. This was causing 352 disasters to be dropped when dropna.
del FEMA_df["disasterCloseOutDate"]

FEMA_df.head()

Unnamed: 0,disasterNumber,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,state,declarationDate,fyDeclared,disasterType,incidentType,title,incidentBeginDate,incidentEndDate,declaredCountyArea,placeCode
0,1,0,1,1,1,GA,1953-05,1953,DR,Tornado,TORNADO,1953-05,1953-05,,
1,3,0,1,1,1,LA,1953-05,1953,DR,Flood,FLOOD,1953-05,1953-05,,
2,9,0,1,1,1,TX,1953-06,1953,DR,Flood,FLOOD,1953-06,1953-06,,
3,31,0,1,1,1,AK,1954-11,1955,DR,Other,SEVERE HARDSHIP,1954-11,1954-11,,
4,35,0,1,1,1,OK,1955-06,1955,DR,Flood,FLOOD & TORNADO,1955-06,1955-06,,


In [3]:
FEMA_df.count()

disasterNumber        49211
ihProgramDeclared     49211
iaProgramDeclared     49211
paProgramDeclared     49211
hmProgramDeclared     49211
state                 49211
declarationDate       49211
fyDeclared            49211
disasterType          49211
incidentType          49211
title                 49211
incidentBeginDate     49211
incidentEndDate       48815
declaredCountyArea    49009
placeCode             49009
dtype: int64

In [4]:
#Drop disaster rows where a county is not listed
FEMA_Clean_df = FEMA_df.dropna(axis = 0, how="any")
FEMA_Clean_df.head()

Unnamed: 0,disasterNumber,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,state,declarationDate,fyDeclared,disasterType,incidentType,title,incidentBeginDate,incidentEndDate,declaredCountyArea,placeCode
43,91,0,1,1,0,IN,1959-01,1959,DR,Flood,FLOOD,1959-01,1959-01,Clay (County),99021.0
108,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Modoc (County),99049.0
118,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Humboldt (County),99023.0
131,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Sacramento (County),99067.0
137,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Butte (County),99007.0


In [5]:
FEMA_Clean_df.count()

disasterNumber        48616
ihProgramDeclared     48616
iaProgramDeclared     48616
paProgramDeclared     48616
hmProgramDeclared     48616
state                 48616
declarationDate       48616
fyDeclared            48616
disasterType          48616
incidentType          48616
title                 48616
incidentBeginDate     48616
incidentEndDate       48616
declaredCountyArea    48616
placeCode             48616
dtype: int64

In [6]:
#Rename columns to be clean
FEMA_Clean_df = FEMA_Clean_df.rename(columns={
    "disasterNumber":"Disaster Number",
    "ihProgramDeclared":"Household Program Declared",
    "iaProgramDeclared":"Individual Assistance Program Declared",
    "paProgramDeclared":"Public Assistance Program Declared",
    "hmProgramDeclared":"Hazard Mitigation Program Declared",
    "state":"State",
    "declarationDate":"Declaration Date",
    "fyDeclared":"Fiscal Year Declared",
    "disasterType":"Disaster Type",
    "incidentType":"Incident Type",
    "title":"Title",
    "incidentBeginDate":"Incident Begin Date",
    "incidentEndDate":"Incident End Date",
    "disasterCloseOutDate":"Disaster Close Out Date",
    "declaredCountyArea":"Declared County Area",
    "placeCode":"Place Code"
})

FEMA_Clean_df.head()

Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Declared County Area,Place Code
43,91,0,1,1,0,IN,1959-01,1959,DR,Flood,FLOOD,1959-01,1959-01,Clay (County),99021.0
108,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Modoc (County),99049.0
118,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Humboldt (County),99023.0
131,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Sacramento (County),99067.0
137,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,Butte (County),99007.0


In [7]:
#Check to see the unique incident types that we will then groupby MAY 1996
FEMA_Clean_df["Incident Type"].unique()

array(['Flood', 'Tornado', 'Earthquake', 'Severe Storm(s)', 'Drought',
       'Hurricane', 'Typhoon', 'Fire', 'Severe Ice Storm', 'Freezing',
       'Snow', 'Coastal Storm', 'Fishing Losses', 'Dam/Levee Break',
       'Mud/Landslide', 'Volcano', 'Toxic Substances', 'Human Cause',
       'Terrorist', 'Tsunami', 'Other', 'Chemical'], dtype=object)

In [8]:
#Our Zillow dataset starts at May 1996, so we want our FEMA data frame to start with declarations from 1996 to present
FEMA96_df = FEMA_Clean_df[FEMA_Clean_df["Incident Begin Date"]>="1997-01"]
#Reset the index
FEMA96_df.reset_index(drop=True, inplace=True)
FEMA96_df.head()

Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Declared County Area,Place Code
0,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Aurora (County),99003.0
1,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Kingsbury (County),99077.0
2,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Buffalo (County),99017.0
3,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Clay (County),99027.0
4,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Harding (County),99063.0


In [9]:
#The values for the County Name field are not in the same format as the Zillow values - FEMA has "(County)"
name = FEMA96_df["Declared County Area"].str.split(pat="(", n=1, expand=True)
name = (name +"County")
# print(name)
FEMA96_df["CountyName"] = name[0]
FEMA96_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Declared County Area,Place Code,CountyName
0,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Aurora (County),99003.0,Aurora County
1,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Kingsbury (County),99077.0,Kingsbury County
2,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Buffalo (County),99017.0,Buffalo County
3,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Clay (County),99027.0,Clay County
4,1156,0,0,1,0,SD,1997-01,1997,DR,Severe Storm(s),SEVERE WINTER STORMS AND BLIZZARD CONDITIONS,1997-01,1997-01,Harding (County),99063.0,Harding County


In [10]:
#Check how many disasters were declared for each incident type
FEMA96_df["Incident Type"].value_counts() 

Severe Storm(s)     13493
Hurricane            9497
Flood                2884
Fire                 2499
Severe Ice Storm     1837
Snow                 1539
Coastal Storm         388
Tornado               328
Other                 284
Freezing               85
Earthquake             66
Typhoon                48
Drought                29
Mud/Landslide          29
Chemical                9
Tsunami                 9
Terrorist               5
Dam/Levee Break         4
Volcano                 2
Toxic Substances        1
Name: Incident Type, dtype: int64

In [11]:
#Keep the top 6 disaster types of which we have over a 1000 disasters of each
FEMA96TopTypes_df = FEMA96_df[ (FEMA96_df["Incident Type"]=="Severe Storm(s)") | (FEMA96_df["Incident Type"]=="Hurricane") | (FEMA96_df["Incident Type"]=="Fire") | (FEMA96_df["Incident Type"]=="Snow") | (FEMA96_df["Incident Type"]=="Flood") | (FEMA96_df["Incident Type"]=="Severe Ice Storm")]
#Check that the code worked properly
FEMA96TopTypes_df["Incident Type"].value_counts()

Severe Storm(s)     13493
Hurricane            9497
Flood                2884
Fire                 2499
Severe Ice Storm     1837
Snow                 1539
Name: Incident Type, dtype: int64

In [12]:
#Separate into a single dataframe for each incident type, if needed
FEMA96_SevereStorm_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Storm(s)" ] 
FEMA96_Hurricane_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Hurricane" ] 
FEMA96_Fire_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Fire" ] 
FEMA96_Snow_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Snow" ] 
FEMA96_Flood_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Flood" ] 
FEMA96_SevereIce_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Ice Storm" ] 

In [13]:
#Output the FEMA96TopTypes into a csv file
FEMA96TopTypes_df.to_csv("..\Cleaned Data\Clean FEMA 1996-Present Disasters Top 6 Types.csv", index = False, header = True) 