In [2]:
#----------------------------------------------
#This Notebook merges the cleaned data from FEMA and Zillow
#----------------------------------------------
%matplotlib notebook
import pandas as pd
import os as os
import csv
import matplotlib.pyplot as plt
import scipy.stats as stats

In [3]:
#Read and store the merged clean data and population data
event_data = os.path.join('..','Cleaned Data','Merged_Clean_Data.csv')
pop_data = os.path.join('..','Cleaned Data','Zillow_Population_Return.csv')

event_df = pd.read_csv(event_data)
pop_df = pd.read_csv(pop_data)

event_df = event_df.drop(event_df.columns[0], axis = 1)
pop_df = pop_df.drop(pop_df.columns[0], axis = 1)

In [4]:
#create empty list for normalized event and population time series data
n = 6
event_lists = [[] for i in range(n)]
pop_lists = [[] for j in range(n)]


#create dictionary with data columns desired prior to adding normalized time series data
normalized_dict = {'title' : event_df['Title'], 'disaster number' : event_df['Disaster Number'],
                          'DisasterType' : event_df['Incident Type'], 'Declaration Date' : event_df['Declaration Date'],
                          'Zip Code' : event_df['RegionName'], 'County' : event_df['CountyName']}

#loop through empty event lists and then loop through event dataframe to populate empty event lists using declaration date to determine column index
event_list_count = -1
for x in event_lists:
    event_list_count += 1
    for index, row in event_df.iterrows():
        ddate = row['Declaration Date']
        try:
            col_idx = event_df.columns.get_loc(ddate)
            event_lists[event_list_count].append(row.iloc[col_idx + event_list_count + 1])
        except:
            event_lists[event_list_count].append(None)

#loop through empty population lists and then loop through population dataframe to populate empty event lists using declaration date from event dataframe to determine column index
pop_list_count = -1
for y in pop_lists:
    pop_list_count += 1 
    for index, row in event_df.iterrows():
        ddate = row['Declaration Date']
        try:
            col_idx = pop_df.columns.get_loc(ddate)
            pop_lists[pop_list_count].append(row.iloc[col_idx + pop_list_count + 1])
        except:
            pop_lists[pop_list_count].append(None)

#loop through populated event lists and add key, value pairs to normalized_dict
event_key_count = -1
for a in event_lists:
    event_key_count += 1
    normalized_dict.update({f't+{event_key_count + 1}': event_lists[event_key_count]})

#loop through populated population lists and add key, value pairs to normalized_dict
pop_key_count = -1
for b in pop_lists:
    pop_key_count += 1
    normalized_dict.update({f't+{pop_key_count + 1}p': pop_lists[pop_key_count]})

#create data frame from normalized_dict
normalized_df = pd.DataFrame(normalized_dict)

#clean normalized_df of rows with NaN
normalized_clean_df = normalized_df.dropna()

#display data frame
normalized_clean_df.head()

Unnamed: 0,title,disaster number,DisasterType,Declaration Date,Zip Code,County,t+1,t+2,t+3,t+4,t+5,t+6,t+1p,t+2p,t+3p,t+4p,t+5p,t+6p
1,"SEVERE STORMS, FLOODING, HEAVY RAINS, HIGH WINDS",1146,Severe Storm(s),1996-11,10025,New York County,0.006399,0.008671,0.009169,0.00795,0.008451,0.00726257,0.000584795,0.00116891,0.003503,0.006399,0.008671,0.009169
2,FIRES AND EXPLOSIONS,1391,Fire,2001-09,10025,New York County,0.000713,0.0,-0.001662,-0.001902,-0.001906,-0.0023872,0.0038305,0.00238493,0.001428,0.000713,0.0,-0.001662
3,SEVERE STORMS AND FLOODING,1534,Severe Storm(s),2004-08,10025,New York County,0.016218,0.017997,0.017678,0.018682,0.020753,0.0225374,0.0060273,0.00881057,0.012402,0.016218,0.017997,0.017678
4,SNOW,3184,Snow,2003-03,10025,New York County,0.019195,0.028137,0.038181,0.044005,0.041743,0.0328382,0.00906921,0.00969726,0.012884,0.019195,0.028137,0.038181
5,HURRICANE KATRINA EVACUATION,3262,Hurricane,2005-09,10025,New York County,0.009115,0.009169,0.009222,0.009675,0.009316,0.00830696,0.0124002,0.0102773,0.009058,0.009115,0.009169,0.009222


In [5]:
normalized_clean_df['DisasterType'].value_counts()


Severe Storm(s)     55424
Hurricane           44677
Fire                28243
Snow                13277
Flood               11262
Severe Ice Storm     4977
Name: DisasterType, dtype: int64

In [6]:
normalized_clean_df = normalized_clean_df.set_index(['DisasterType', 'Zip Code'])
normalized_clean_df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,title,disaster number,Declaration Date,County,t+1,t+2,t+3,t+4,t+5,t+6,t+1p,t+2p,t+3p,t+4p,t+5p,t+6p
DisasterType,Zip Code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Severe Storm(s),10025,"SEVERE STORMS, FLOODING, HEAVY RAINS, HIGH WINDS",1146,1996-11,New York County,0.006399,0.008671,0.009169,0.00795,0.008451,0.00726257,0.000584795,0.00116891,0.003503,0.006399,0.008671,0.009169
Fire,10025,FIRES AND EXPLOSIONS,1391,2001-09,New York County,0.000713,0.0,-0.001662,-0.001902,-0.001906,-0.0023872,0.0038305,0.00238493,0.001428,0.000713,0.0,-0.001662
Severe Storm(s),10025,SEVERE STORMS AND FLOODING,1534,2004-08,New York County,0.016218,0.017997,0.017678,0.018682,0.020753,0.0225374,0.0060273,0.00881057,0.012402,0.016218,0.017997,0.017678
Snow,10025,SNOW,3184,2003-03,New York County,0.019195,0.028137,0.038181,0.044005,0.041743,0.0328382,0.00906921,0.00969726,0.012884,0.019195,0.028137,0.038181
Hurricane,10025,HURRICANE KATRINA EVACUATION,3262,2005-09,New York County,0.009115,0.009169,0.009222,0.009675,0.009316,0.00830696,0.0124002,0.0102773,0.009058,0.009115,0.009169,0.009222


In [8]:
normalized_clean_df.to_csv('../Cleaned Data/Normalized_Clean_Data.csv')

In [41]:
hurricane_df = normalized_clean_df.loc['Hurricane', :]
hurricane_df.drop_duplicates()
hurricane_df.head()

Unnamed: 0_level_0,title,disaster number,Declaration Date,County,t+1,t+2,t+3,t+4,t+5,t+6,t+1p,t+2p,t+3p,t+4p,t+5p,t+6p
Zip Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
10025,HURRICANE KATRINA EVACUATION,3262,2005-09,New York County,0.009115,0.009169,0.009222,0.009675,0.009316,0.00830696,0.0124002,0.0102773,0.009058,0.009115,0.009169,0.009222
10025,HURRICANE IRENE,3328,2011-08,New York County,0.003734,0.007303,0.001915,-0.003823,-0.000548,0.00397696,-0.0134771,-0.00819672,-0.004132,0.003734,0.007303,0.001915
10025,HURRICANE SANDY,3351,2012-10,New York County,0.005401,0.021361,0.026049,0.022214,0.017791,0.00973721,0.00855714,0.010573,0.004392,0.005401,0.021361,0.026049
10025,HURRICANE IRENE,4020,2011-08,New York County,0.003734,0.007303,0.001915,-0.003823,-0.000548,0.00397696,-0.0134771,-0.00819672,-0.004132,0.003734,0.007303,0.001915
10025,HURRICANE SANDY,4085,2012-10,New York County,0.005401,0.021361,0.026049,0.022214,0.017791,0.00973721,0.00855714,0.010573,0.004392,0.005401,0.021361,0.026049


In [39]:
#Removes data that improperly aggregated into this data frame. 
hurricane_df_returns = hurricane_df[["t+1", 't+2', 't+3', 't+4', 't+5', 't+6', 't+1p', 't+2p', 't+3p', 't+4p', 't+5p', 't+6p']]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+1"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+2"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+3"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+4"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+5"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+6"] < 1]
hurricane_df_returns.count()

t+1     42880
t+2     42880
t+3     42880
t+4     42880
t+5     42880
t+6     42880
t+1p    42880
t+2p    42880
t+3p    42880
t+4p    42880
t+5p    42880
t+6p    42880
dtype: int64

In [57]:
#Test for the first Month
housing = hurricane_df_returns['t+1']
pop = hurricane_df_returns['t+1p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=-3.3732616113356397, pvalue=0.0007431634665147211)

In [58]:
#Test for the Second Month
housing = hurricane_df_returns['t+2']
pop = hurricane_df_returns['t+2p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=-5.886679415447131, pvalue=3.955037357474838e-09)

In [59]:
#Test for the third Month
housing = hurricane_df_returns['t+3']
pop = hurricane_df_returns['t+3p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=-6.578383526256998, pvalue=4.7832639564267164e-11)

In [60]:
#Test for the fourth Month
housing = hurricane_df_returns['t+4']
pop = hurricane_df_returns['t+4p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=-4.136901413043281, pvalue=3.5236269133723e-05)

In [61]:
#Test for the fifth Month
housing = hurricane_df_returns['t+5']
pop = hurricane_df_returns['t+5p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=0.8815371397765109, pvalue=0.37802963373550424)

In [62]:
#Test for the sixth Month
housing = hurricane_df_returns['t+6']
pop = hurricane_df_returns['t+6p']

stats.ttest_ind(housing, pop, equal_var=False)

Ttest_indResult(statistic=3.1931219239731345, pvalue=0.001407943542562569)

In [63]:
#Creates the charting Data Frame
hurricane_average_df = hurricane_df_returns.mean(axis = 'rows')
hurricane_average_df = pd.DataFrame(hurricane_average_df).T
hurricane_average_df

Unnamed: 0,t+1,t+2,t+3,t+4,t+5,t+6,t+1p,t+2p,t+3p,t+4p,t+5p,t+6p
0,0.005231,0.005023,0.004844,0.004966,0.005079,0.005048,0.005459,0.005411,0.005274,0.005231,0.005023,0.004844
