### FEMA Data Preprocessing
This file covers initial data loading and cleaning for the FEMA dataset. 

This FEMA dataset is used to find census tracts that are in areas FEMA has designated as high risk flood zones.
Because this original datafile is over 500mb in size, we have reduced the file to only include census tracts in coastal counties,
and with 'Very High', and 'Relatively High' flood risk levels.

In [1]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)

In [2]:
def coastal_counties(path = ""):
    """
    Description: function to load the file of coastal counties. Note for this analysis,  
    only contiguous US territories are considered. So Hawaii will be dropped
    I/P: Optional file path. See [1] in references for original source
    O/P:Series of coastal counties
    """
    cc = pd.read_excel(path + "Coastal/coastline-counties-list.xlsx", usecols = [0, 3, 4, 5], 
                       skiprows = 3, nrows = 255, dtype = str)
    cc.columns = ["state_county_FIPS", "county_name", "state_name", "ocean"]
    cc = cc[cc.state_name != "Hawaii"] 
#     cc.drop(labels = ["state"], axis = 1, inplace = True)
    
    #dropping 'county' from county names so we can match fema data
    cc["county_name"] = [name[:-7] if " County" in name else name for name in list(cc["county_name"])]

    return cc

ccounties = coastal_counties('data/')
ccounties.sample(3)

Unnamed: 0,state_county_FIPS,county_name,state_name,ocean
62,12035,Flagler,Florida,Atlantic
226,51131,Northampton,Virginia,Atlantic
148,34013,Essex,New Jersey,Atlantic


In [14]:
def fema_datacleaning(ccounties):
    
    """ 
    Description: this function cleans the FEMA dataset to keeps only the data needed for further analysis.
    I/P: original dataframe
    O/P: cleaned dataframe
    """
        
    #data loading
    fema = pd.read_csv("data/FEMA/NRI_Table_CensusTracts.csv",
                             dtype={'STCOFIPS': object, "TRACT" : object},
                             usecols = ['STCOFIPS', 'TRACT', 'CFLD_RISKR', 'STATE', 'COUNTY'])
    
    #Step 1: COASTAL PROPERTIES 
    #to reduce the data size, all non-coastal counties are removed per reference [1] and only  Note for this analysis,
    #only contiguous US territories are considered; Hawaii, Puerto Rico, US Virgin Islands and other such territories
    #are not included in the analysis. Additionally, counties with great lakes coasts are not considerd since the focus
    #is on sea level rising. 
    fema = fema[fema.STCOFIPS.isin(ccounties.state_county_FIPS)]
    
    
    #Step 2: FEMA FLOOD RISK 
    ##The risk rating options are 'Not Applicable', 'Very Low', 'No Rating', 'Relatively Low', 'Relatively Moderate', 
    ##'Very High', 'Relatively High','Insufficient Data'. For the purpose of this analysis, we only keep those 
    ##locations where the coastal flooding risk is 'Very High', 'Relatively High' and 'Relatively Moderate'
    ##See data dictionary for a full list of attribute meanings
    
    fema = fema[fema.CFLD_RISKR.isin(['Very High', 'Relatively High'])]
    
    ##create unique_identifier
    fema["census_tract"] = fema.STCOFIPS + fema.TRACT
    
    filename = 'cleaned_FEMA'
    compression_options = dict(method='zip', archive_name=f'data/{filename}.pkl')
    fema.to_pickle(f'data/{filename}.zip', compression=compression_options)
    
    return 


In [15]:
fema_datacleaning(ccounties)