In [None]:
#----------------------------------------------
#This Notebook merges the cleaned data from FEMA and Zillow
#----------------------------------------------
%matplotlib notebook
import pandas as pd
import os as os
import csv
import matplotlib.pyplot as plt
import scipy.stats as stats

In [None]:
#Read and store the merged clean data and population data
event_data = os.path.join('..','Cleaned Data','Merged_Clean_Data.csv')
pop_data = os.path.join('..','Cleaned Data','Zillow_Population_Return.csv')

event_df = pd.read_csv(event_data)
pop_df = pd.read_csv(pop_data)

event_df = event_df.drop(event_df.columns[0], axis = 1)
pop_df = pop_df.drop(pop_df.columns[0], axis = 1)

In [None]:
#create empty list for normalized event and population time series data
n = 6
event_lists = [[] for i in range(n)]
pop_lists = [[] for j in range(n)]


#create dictionary with data columns desired prior to adding normalized time series data
normalized_dict = {'title' : event_df['Title'], 'disaster number' : event_df['Disaster Number'],
                          'DisasterType' : event_df['Incident Type'], 'Declaration Date' : event_df['Declaration Date'],
                          'Zip Code' : event_df['RegionName'], 'County' : event_df['CountyName']}

#loop through empty event lists and then loop through event dataframe to populate empty event lists using declaration date to determine column index
event_list_count = -1
for x in event_lists:
    event_list_count += 1
    for index, row in event_df.iterrows():
        ddate = row['Declaration Date']
        try:
            col_idx = event_df.columns.get_loc(ddate)
            event_lists[event_list_count].append(row.iloc[col_idx + event_list_count + 1])
        except:
            event_lists[event_list_count].append(np.nan)

#loop through empty population lists and then loop through population dataframe to populate empty event lists using declaration date from event dataframe to determine column index
pop_list_count = -1
for y in pop_lists:
    pop_list_count += 1 
    for index, row in event_df.iterrows():
        ddate = row['Declaration Date']
        try:
            col_idx = pop_df.columns.get_loc(ddate)
            pop_lists[pop_list_count].append(pop_df.iloc[0, col_idx + pop_list_count + 1])
        except:
            pop_lists[pop_list_count].append(np.nan)

#loop through populated event lists and add key, value pairs to normalized_dict
event_key_count = -1
for a in event_lists:
    event_key_count += 1
    normalized_dict.update({f't+{event_key_count + 1}': event_lists[event_key_count]})

#loop through populated population lists and add key, value pairs to normalized_dict
pop_key_count = -1
for b in pop_lists:
    pop_key_count += 1
    normalized_dict.update({f't+{pop_key_count + 1}p': pop_lists[pop_key_count]})

#create data frame from normalized_dict
normalized_df = pd.DataFrame(normalized_dict)

#clean normalized_df of rows with NaN
normalized_clean_df = normalized_df

In [None]:
normalized_clean_df['DisasterType'].value_counts()

In [None]:
normalized_clean_df = normalized_clean_df.set_index(['DisasterType', 'Zip Code'])

In [None]:
normalized_clean_df.to_csv('../Cleaned Data/Normalized_Clean_Data.csv')

In [None]:
hurricane_df = normalized_clean_df.loc['Flood', :]
hurricane_df.drop_duplicates()
#hurricane_df['t+1p'].apply(lambda x: x if isinstance(x, (int, float)) else None)
#def removestring(x):
#    if not x.isinstance(x, float): 
#        x=None 
#    return x

#hurricane_df["t+1p"].apply(lambda x : removestring(x))

In [None]:
#Removes data that improperly aggregated into this data frame. 
hurricane_df_returns = hurricane_df[["t+1", 't+2', 't+3', 't+4', 't+5', 't+6', 't+1p', 't+2p', 't+3p', 't+4p', 't+5p', 't+6p']]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+1"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+2"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+3"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+4"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+5"] < 1]
hurricane_df_returns = hurricane_df_returns[hurricane_df_returns["t+6"] < 1]
hurricane_df_returns.count()

In [None]:
#Test for the first Month
housing = hurricane_df_returns['t+1']
pop = hurricane_df_returns['t+1p']

pop

stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Test for the Second Month
housing = hurricane_df_returns['t+2']
pop = hurricane_df_returns['t+2p']
pop
stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Test for the third Month
housing = hurricane_df_returns['t+3']
pop = hurricane_df_returns['t+3p']

stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Test for the fourth Month
housing = hurricane_df_returns['t+4']
pop = hurricane_df_returns['t+4p']

stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Test for the fifth Month
housing = hurricane_df_returns['t+5']
pop = hurricane_df_returns['t+5p']

stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Test for the sixth Month
housing = hurricane_df_returns['t+6']
pop = hurricane_df_returns['t+6p']

stats.ttest_ind(housing, pop, equal_var=False)

In [None]:
#Creates the charting Data Frame
hurricane_average_df = hurricane_df_returns.mean(axis = 'rows')
hurricane_average_df = pd.DataFrame(hurricane_average_df).T
hurricane_average_df