In [1]:
#----------------------------------------------
#This Notebook Cleans the data provided by FEMA
#----------------------------------------------

import pandas as pd
import numpy as np
import os as os
import csv


In [2]:
#Read and store the FEMA Disaster data

FEMA_csv = os.path.join("..","Data","Fema_Declaration_dates.csv")

FEMA_df = pd.read_csv(FEMA_csv)

#Delete the columns that are not part of the original dataset
del FEMA_df["Unnamed: 16"]
del FEMA_df["Unnamed: 17"]
del FEMA_df["Unnamed: 18"]
del FEMA_df["Unnamed: 19"]
del FEMA_df["Unnamed: 20"]
del FEMA_df["Unnamed: 21"]
del FEMA_df["Unnamed: 22"]
del FEMA_df["Unnamed: 23"]

FEMA_df.head()

Unnamed: 0,disasterNumber,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,state,declarationDate,fyDeclared,disasterType,incidentType,title,incidentBeginDate,incidentEndDate,disasterCloseOutDate,declaredCountyArea,placeCode
0,1,0,1,1,1,GA,1953-05,1953,DR,Tornado,TORNADO,1953-05,1953-05,1954-06,,
1,3,0,1,1,1,LA,1953-05,1953,DR,Flood,FLOOD,1953-05,1953-05,1960-02,,
2,9,0,1,1,1,TX,1953-06,1953,DR,Flood,FLOOD,1953-06,1953-06,1958-01,,
3,31,0,1,1,1,AK,1954-11,1955,DR,Other,SEVERE HARDSHIP,1954-11,1954-11,1957-09,,
4,35,0,1,1,1,OK,1955-06,1955,DR,Flood,FLOOD & TORNADO,1955-06,1955-06,1959-12,,


In [3]:
#Drop disaster rows where a county is not listed
FEMA_Clean_df = FEMA_df.dropna(axis = 0, how="any")
FEMA_Clean_df.head()

Unnamed: 0,disasterNumber,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,state,declarationDate,fyDeclared,disasterType,incidentType,title,incidentBeginDate,incidentEndDate,disasterCloseOutDate,declaredCountyArea,placeCode
43,91,0,1,1,0,IN,1959-01,1959,DR,Flood,FLOOD,1959-01,1959-01,1960-11,Clay (County),99021.0
108,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Modoc (County),99049.0
118,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Humboldt (County),99023.0
131,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Sacramento (County),99067.0
137,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Butte (County),99007.0


In [4]:
#Rename columns to be clean
FEMA_Clean_df = FEMA_Clean_df.rename(columns={
    "disasterNumber":"Disaster Number",
    "ihProgramDeclared":"Household Program Declared",
    "iaProgramDeclared":"Individual Assistance Program Declared",
    "paProgramDeclared":"Public Assistance Program Declared",
    "hmProgramDeclared":"Hazard Mitigation Program Declared",
    "state":"State",
    "declarationDate":"Declaration Date",
    "fyDeclared":"Fiscal Year Declared",
    "disasterType":"Disaster Type",
    "incidentType":"Incident Type",
    "title":"Title",
    "incidentBeginDate":"Incident Begin Date",
    "incidentEndDate":"Incident End Date",
    "disasterCloseOutDate":"Disaster Close Out Date",
    "declaredCountyArea":"Declared County Area",
    "placeCode":"Place Code"
})

FEMA_Clean_df.head()

Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Disaster Close Out Date,Declared County Area,Place Code
43,91,0,1,1,0,IN,1959-01,1959,DR,Flood,FLOOD,1959-01,1959-01,1960-11,Clay (County),99021.0
108,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Modoc (County),99049.0
118,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Humboldt (County),99023.0
131,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Sacramento (County),99067.0
137,183,0,1,1,0,CA,1964-12,1965,DR,Flood,HEAVY RAINS & FLOODING,1964-12,1964-12,1976-04,Butte (County),99007.0


In [26]:
#Check to see the unique incident types that we will then groupby MAY 1996
FEMA_Clean_df["Incident Type"].unique()

array(['Flood', 'Tornado', 'Earthquake', 'Severe Storm(s)', 'Drought',
       'Hurricane', 'Typhoon', 'Fire', 'Severe Ice Storm', 'Freezing',
       'Snow', 'Coastal Storm', 'Fishing Losses', 'Dam/Levee Break',
       'Mud/Landslide', 'Volcano', 'Toxic Substances', 'Human Cause',
       'Terrorist', 'Tsunami', 'Other', 'Chemical'], dtype=object)

In [30]:
#Our Zillow dataset starts at May 1996, so we want our FEMA data frame to start with declarations from 1996 to present
FEMA96_df = FEMA_Clean_df[FEMA_Clean_df["Incident Begin Date"]>="1996-01"]
#Reset the index
FEMA96_df.reset_index(drop=True, inplace=True)
FEMA96_df.head()

Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Disaster Close Out Date,Declared County Area,Place Code
0,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Kent (County),99029.0
1,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Dorchester (County),99019.0
2,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,St. Mary's (County),99037.0
3,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Calvert (County),99009.0
4,1082,0,0,1,0,DE,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2001-06,Kent (County),99001.0


In [67]:
#The values for the County Name field are not in the same format as the Zillow values - FEMA has "(County)"
name = FEMA96_df["Declared County Area"].str.split(pat="(", n=1, expand=True)
name = (name +" County")
# print(name)
FEMA96_df["CountyName"] = name[0]
FEMA96_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,Disaster Number,Household Program Declared,Individual Assistance Program Declared,Public Assistance Program Declared,Hazard Mitigation Program Declared,State,Declaration Date,Fiscal Year Declared,Disaster Type,Incident Type,Title,Incident Begin Date,Incident End Date,Disaster Close Out Date,Declared County Area,Place Code,CountyName
0,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Kent (County),99029.0,Kent County
1,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Dorchester (County),99019.0,Dorchester County
2,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,St. Mary's (County),99037.0,St. Mary's County
3,1081,0,0,1,0,MD,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2003-07,Calvert (County),99009.0,Calvert County
4,1082,0,0,1,0,DE,1996-01,1996,DR,Snow,BLIZZARD OF 96 (SEVERE SNOW STORM),1996-01,1996-01,2001-06,Kent (County),99001.0,Kent County


In [68]:
#Check how many disasters were declared for each incident type
FEMA96_df["Incident Type"].value_counts() 

Severe Storm(s)     9925
Hurricane           6539
Fire                2039
Snow                1790
Flood               1679
Severe Ice Storm    1463
Coastal Storm        339
Other                286
Tornado              245
Freezing              84
Earthquake            40
Typhoon               36
Drought               29
Chemical               9
Terrorist              5
Tsunami                3
Mud/Landslide          2
Toxic Substances       1
Dam/Levee Break        1
Name: Incident Type, dtype: int64

In [69]:
#Keep the top 6 disaster types of which we have over a 1000 disasters of each
FEMA96TopTypes_df = FEMA96_df[ (FEMA96_df["Incident Type"]=="Severe Storm(s)") | (FEMA96_df["Incident Type"]=="Hurricane") | (FEMA96_df["Incident Type"]=="Fire") | (FEMA96_df["Incident Type"]=="Snow") | (FEMA96_df["Incident Type"]=="Flood") | (FEMA96_df["Incident Type"]=="Severe Ice Storm")]
#Check that the code worked properly
FEMA96TopTypes_df["Incident Type"].value_counts()

Severe Storm(s)     9925
Hurricane           6539
Fire                2039
Snow                1790
Flood               1679
Severe Ice Storm    1463
Name: Incident Type, dtype: int64

In [70]:
#Separate into a single dataframe for each incident type, if needed
FEMA96_SevereStorm_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Storm(s)" ] 
FEMA96_Hurricane_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Hurricane" ] 
FEMA96_Fire_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Fire" ] 
FEMA96_Snow_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Snow" ] 
FEMA96_Flood_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Flood" ] 
FEMA96_SevereIce_df = FEMA96TopTypes_df[ FEMA96TopTypes_df["Incident Type"]=="Severe Ice Storm" ] 

In [71]:
#Output the FEMA96TopTypes into a csv file
FEMA96TopTypes_df.to_csv("..\Cleaned Data\Clean FEMA 1996-Present Disasters Top 6 Types.csv", index = False, header = True) 