In [13]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import csv
from scipy.stats import linregress


In [14]:
#import Econ data from Houston

rita_1 = "./Economic Data/Houstonareahousing.csv"

rita_Housing_df = pd.read_csv(rita_1)
rita_Housing_df.head()

Unnamed: 0,DATE,ATNHPIUS26420Q
0,1976-01-01,51.87
1,1976-04-01,58.67
2,1976-07-01,58.59
3,1976-10-01,58.18
4,1977-01-01,60.39


In [15]:
# remove rows prior to 1990

rita_Housing_df['DATE'] = pd.to_datetime(rita_Housing_df['DATE'])

rita_Housing_df = rita_Housing_df[rita_Housing_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_Housing_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_Housing_df.index)

rita_Housing_df['Hurricane Date'] = pd.to_datetime(rita_Housing_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_Housing_df['Delta'] = ((rita_Housing_df['DATE'] - rita_Housing_df['Hurricane Date']).dt.days/365)

rita_Housing_df['Delta'] = rita_Housing_df['Delta'].astype(int)

#Rename column 
rita_Housing_df = rita_Housing_df.rename(columns={'ATNHPIUS26420Q': 'Housing Price Index'})

rita_Housing_df.head()

Unnamed: 0,DATE,Housing Price Index,Hurricane,Hurricane Date,Location,Delta
100,2001-01-01,135.43,Rita,2005-09-18,Houston Area,-4
101,2001-04-01,136.85,Rita,2005-09-18,Houston Area,-4
102,2001-07-01,138.65,Rita,2005-09-18,Houston Area,-4
103,2001-10-01,140.0,Rita,2005-09-18,Houston Area,-3
104,2002-01-01,140.74,Rita,2005-09-18,Houston Area,-3


In [16]:
#import Econ data from Houston

rita_2 = "./Economic Data/HoustonNewPrivateHousingStructures.csv"

rita_HousingAuthorized_df = pd.read_csv(rita_2)
rita_HousingAuthorized_df.head()

Unnamed: 0,DATE,BPPRIV048225
0,1990-01-01,2.0
1,1991-01-01,1.0
2,1992-01-01,78.0
3,1993-01-01,7.0
4,1994-01-01,7.0


In [17]:
# remove rows prior to 1990

rita_HousingAuthorized_df['DATE'] = pd.to_datetime(rita_HousingAuthorized_df['DATE'])

rita_HousingAuthorized_df = rita_HousingAuthorized_df[rita_HousingAuthorized_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_HousingAuthorized_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_HousingAuthorized_df.index)

rita_HousingAuthorized_df['Hurricane Date'] = pd.to_datetime(rita_HousingAuthorized_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_HousingAuthorized_df['Delta'] = ((rita_HousingAuthorized_df['DATE'] - rita_HousingAuthorized_df['Hurricane Date']).dt.days/365)

rita_HousingAuthorized_df['Delta'] = rita_HousingAuthorized_df['Delta'].astype(int)

#Rename column 
rita_HousingAuthorized_df = rita_HousingAuthorized_df.rename(columns={'BPPRIV048225': 'New Private Housing Structures Authorized'})

rita_HousingAuthorized_df.head()

Unnamed: 0,DATE,New Private Housing Structures Authorized,Hurricane,Hurricane Date,Location,Delta
11,2001-01-01,6.0,Rita,2005-09-18,Houston Area,-4
12,2002-01-01,7.0,Rita,2005-09-18,Houston Area,-3
13,2003-01-01,8.0,Rita,2005-09-18,Houston Area,-2
14,2004-01-01,7.0,Rita,2005-09-18,Houston Area,-1
15,2005-01-01,7.0,Rita,2005-09-18,Houston Area,0


In [18]:
#import Econ data from Houston

rita_3 = "./Economic Data/HoustonRealGDP.csv"

rita_real_GDP_df = pd.read_csv(rita_3)
rita_real_GDP_df.head()

Unnamed: 0,DATE,REALGDPALL48225
0,2001-01-01,653892
1,2002-01-01,645483
2,2003-01-01,671373
3,2004-01-01,753849
4,2005-01-01,748588


In [19]:
# remove rows prior to 1990

rita_real_GDP_df['DATE'] = pd.to_datetime(rita_real_GDP_df['DATE'])

rita_real_GDP_df = rita_real_GDP_df[rita_real_GDP_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_real_GDP_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_real_GDP_df.index)

rita_real_GDP_df['Hurricane Date'] = pd.to_datetime(rita_real_GDP_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_real_GDP_df['Delta'] = ((rita_real_GDP_df['DATE'] - rita_real_GDP_df['Hurricane Date']).dt.days/365)

rita_real_GDP_df['Delta'] = rita_real_GDP_df['Delta'].astype(int)

#Rename column 
rita_real_GDP_df = rita_real_GDP_df.rename(columns={'REALGDPALL48225': 'Real GDP'})

rita_real_GDP_df.head()

Unnamed: 0,DATE,Real GDP,Hurricane,Hurricane Date,Location,Delta
0,2001-01-01,653892,Rita,2005-09-18,Houston Area,-4
1,2002-01-01,645483,Rita,2005-09-18,Houston Area,-3
2,2003-01-01,671373,Rita,2005-09-18,Houston Area,-2
3,2004-01-01,753849,Rita,2005-09-18,Houston Area,-1
4,2005-01-01,748588,Rita,2005-09-18,Houston Area,0


In [20]:
#import Econ data from Houston

rita_4 = "./Economic Data/HoustonCtyUnemployment.csv"

rita_unemployment_df = pd.read_csv(rita_4)
rita_unemployment_df.head()

Unnamed: 0,DATE,TXHOUS5URN
0,1990-01-01,4.8
1,1990-02-01,4.8
2,1990-03-01,4.8
3,1990-04-01,5.1
4,1990-05-01,5.0


In [21]:
# remove rows prior to 1990

rita_unemployment_df['DATE'] = pd.to_datetime(rita_unemployment_df['DATE'])

rita_unemployment_df = rita_unemployment_df[rita_unemployment_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_unemployment_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_unemployment_df.index)

rita_unemployment_df['Hurricane Date'] = pd.to_datetime(rita_unemployment_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_unemployment_df['Delta'] = ((rita_unemployment_df['DATE'] - rita_unemployment_df['Hurricane Date']).dt.days/365)

rita_unemployment_df['Delta'] = rita_unemployment_df['Delta'].astype(int)

#Rename column 
rita_unemployment_df = rita_unemployment_df.rename(columns={'TXHOUS5URN': 'Unemployment'})

rita_unemployment_df.head()

Unnamed: 0,DATE,Unemployment,Hurricane,Hurricane Date,Location,Delta
132,2001-01-01,4.7,Rita,2005-09-18,Houston Area,-4
133,2001-02-01,4.4,Rita,2005-09-18,Houston Area,-4
134,2001-03-01,4.9,Rita,2005-09-18,Houston Area,-4
135,2001-04-01,4.7,Rita,2005-09-18,Houston Area,-4
136,2001-05-01,4.6,Rita,2005-09-18,Houston Area,-4


In [22]:
#import Econ data from Houston

rita_5 = "./Economic Data/HoustonMedianIncome.csv"

rita_median_df = pd.read_csv(rita_5)
rita_median_df.head()

Unnamed: 0,DATE,MHITX48225A052NCEN
0,1989-01-01,17667
1,1990-01-01,.
2,1991-01-01,.
3,1992-01-01,.
4,1993-01-01,21429


In [23]:
# remove rows prior to 1990

rita_median_df['DATE'] = pd.to_datetime(rita_median_df['DATE'])

rita_median_df = rita_median_df[rita_median_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_median_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_median_df.index)

rita_median_df['Hurricane Date'] = pd.to_datetime(rita_median_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_median_df['Delta'] = ((rita_median_df['DATE'] - rita_median_df['Hurricane Date']).dt.days/365)

rita_median_df['Delta'] = rita_median_df['Delta'].astype(int)

#Rename column 
rita_median_df = rita_median_df.rename(columns={'TXHOUS5URN': 'Unemployment'})

rita_median_df.head()

Unnamed: 0,DATE,MHITX48225A052NCEN,Hurricane,Hurricane Date,Location,Delta
12,2001-01-01,27868,Rita,2005-09-18,Houston Area,-4
13,2002-01-01,27497,Rita,2005-09-18,Houston Area,-3
14,2003-01-01,28036,Rita,2005-09-18,Houston Area,-2
15,2004-01-01,28630,Rita,2005-09-18,Houston Area,-1
16,2005-01-01,30146,Rita,2005-09-18,Houston Area,0


In [25]:
#import Econ data from Houston

rita_6 = "./Economic Data/HoustonPop.csv"

rita_population_df = pd.read_csv(rita_6)
rita_population_df.head()

Unnamed: 0,DATE,Population
0,1/1/1970,17.855
1,1/1/1971,18.6
2,1/1/1972,19.2
3,1/1/1973,18.9
4,1/1/1974,18.7


In [26]:
# remove rows prior to 1990

rita_population_df['DATE'] = pd.to_datetime(rita_population_df['DATE'])

rita_population_df = rita_population_df[rita_population_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_population_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_population_df.index)

rita_population_df['Hurricane Date'] = pd.to_datetime(rita_population_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_population_df['Delta'] = ((rita_population_df['DATE'] - rita_population_df['Hurricane Date']).dt.days/365)

rita_population_df['Delta'] = rita_population_df['Delta'].astype(int)

rita_population_df.head()

Unnamed: 0,DATE,Population,Hurricane,Hurricane Date,Location,Delta
31,2001-01-01,22.969,Rita,2005-09-18,Houston Area,-4
32,2002-01-01,22.971,Rita,2005-09-18,Houston Area,-3
33,2003-01-01,22.904,Rita,2005-09-18,Houston Area,-2
34,2004-01-01,23.004,Rita,2005-09-18,Houston Area,-1
35,2005-01-01,22.786,Rita,2005-09-18,Houston Area,0


In [27]:
#export to CSV

output_data_file = './CleanEconData/rita_cln_unemployment.csv'
  
rita_unemployment_df.to_csv(output_data_file, sep=',')

output_data_file2 = './CleanEconData/rita_cln_realGDP.csv'
  
rita_real_GDP_df.to_csv(output_data_file2, sep=',')

output_data_file3 = './CleanEconData/rita_cln_housingauth.csv'
  
rita_HousingAuthorized_df.to_csv(output_data_file3, sep=',')

output_data_file4 = './CleanEconData/rita_cln_housingpriceindex.csv'
  
rita_Housing_df.to_csv(output_data_file4, sep=',')

output_data_file5 = './CleanEconData/rita_cln_median.csv'
  
rita_median_df.to_csv(output_data_file5, sep=',')

output_data_file6 = './CleanEconData/rita_cln_population.csv'
  
rita_population_df.to_csv(output_data_file6, sep=',')