In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import csv
from scipy.stats import linregress


In [2]:
#import Econ data from Houston

rita_1 = "./Economic Data/Houstonareahousing.csv"

rita_Housing_df = pd.read_csv(rita_1)
rita_Housing_df.head()

Unnamed: 0,DATE,ATNHPIUS48201A
0,1/1/1975,39.08
1,1/1/1976,45.29
2,1/1/1977,52.53
3,1/1/1978,58.96
4,1/1/1979,67.19


In [3]:
# remove rows prior to 1990

rita_Housing_df['DATE'] = pd.to_datetime(rita_Housing_df['DATE'])

rita_Housing_df = rita_Housing_df[rita_Housing_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_Housing_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_Housing_df.index)

rita_Housing_df['Hurricane Date'] = pd.to_datetime(rita_Housing_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_Housing_df['Delta'] = ((rita_Housing_df['DATE'] - rita_Housing_df['Hurricane Date']).dt.days/365)

rita_Housing_df['Delta'] = rita_Housing_df['Delta'].astype(int)

#Rename column 
rita_Housing_df = rita_Housing_df.rename(columns={'ATNHPIUS48201A': 'Housing Price Index'})

rita_Housing_df.head()

Unnamed: 0,DATE,Housing Price Index,Hurricane,Hurricane Date,Location,Delta
26,2001-01-01,106.08,Rita,2005-09-18,Houston Area,-4
27,2002-01-01,110.39,Rita,2005-09-18,Houston Area,-3
28,2003-01-01,113.49,Rita,2005-09-18,Houston Area,-2
29,2004-01-01,117.15,Rita,2005-09-18,Houston Area,-1
30,2005-01-01,121.9,Rita,2005-09-18,Houston Area,0


In [4]:
#import Econ data from Houston

rita_2 = "./Economic Data/HoustonNewPrivateHousingStructures.csv"

rita_HousingAuthorized_df = pd.read_csv(rita_2)
rita_HousingAuthorized_df.head()

Unnamed: 0,DATE,BPPRIV048201
0,1/1/1990,10151
1,1/1/1991,12647
2,1/1/1992,12558
3,1/1/1993,13222
4,1/1/1994,15855


In [5]:
# remove rows prior to 1990

rita_HousingAuthorized_df['DATE'] = pd.to_datetime(rita_HousingAuthorized_df['DATE'])

rita_HousingAuthorized_df = rita_HousingAuthorized_df[rita_HousingAuthorized_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_HousingAuthorized_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_HousingAuthorized_df.index)

rita_HousingAuthorized_df['Hurricane Date'] = pd.to_datetime(rita_HousingAuthorized_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_HousingAuthorized_df['Delta'] = ((rita_HousingAuthorized_df['DATE'] - rita_HousingAuthorized_df['Hurricane Date']).dt.days/365)

rita_HousingAuthorized_df['Delta'] = rita_HousingAuthorized_df['Delta'].astype(int)

#Rename column 
rita_HousingAuthorized_df = rita_HousingAuthorized_df.rename(columns={'BPPRIV048201': 'New Private Housing Structures Authorized'})

rita_HousingAuthorized_df.head()

Unnamed: 0,DATE,New Private Housing Structures Authorized,Hurricane,Hurricane Date,Location,Delta
11,2001-01-01,26419,Rita,2005-09-18,Houston Area,-4
12,2002-01-01,34339,Rita,2005-09-18,Houston Area,-3
13,2003-01-01,40983,Rita,2005-09-18,Houston Area,-2
14,2004-01-01,36395,Rita,2005-09-18,Houston Area,-1
15,2005-01-01,41506,Rita,2005-09-18,Houston Area,0


In [6]:
#import Econ data from Houston

rita_3 = "./Economic Data/HoustonRealGDP.csv"

rita_real_GDP_df = pd.read_csv(rita_3)
rita_real_GDP_df.head()

Unnamed: 0,DATE,REALGDPALL48201
0,1/1/2001,244128701
1,1/1/2002,242658014
2,1/1/2003,243159988
3,1/1/2004,265896284
4,1/1/2005,269128633


In [7]:
# remove rows prior to 1990

rita_real_GDP_df['DATE'] = pd.to_datetime(rita_real_GDP_df['DATE'])

rita_real_GDP_df = rita_real_GDP_df[rita_real_GDP_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_real_GDP_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_real_GDP_df.index)

rita_real_GDP_df['Hurricane Date'] = pd.to_datetime(rita_real_GDP_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_real_GDP_df['Delta'] = ((rita_real_GDP_df['DATE'] - rita_real_GDP_df['Hurricane Date']).dt.days/365)

rita_real_GDP_df['Delta'] = rita_real_GDP_df['Delta'].astype(int)

#Rename column 
rita_real_GDP_df = rita_real_GDP_df.rename(columns={'REALGDPALL48201': 'Real GDP'})

rita_real_GDP_df.head()

Unnamed: 0,DATE,Real GDP,Hurricane,Hurricane Date,Location,Delta
0,2001-01-01,244128701,Rita,2005-09-18,Houston Area,-4
1,2002-01-01,242658014,Rita,2005-09-18,Houston Area,-3
2,2003-01-01,243159988,Rita,2005-09-18,Houston Area,-2
3,2004-01-01,265896284,Rita,2005-09-18,Houston Area,-1
4,2005-01-01,269128633,Rita,2005-09-18,Houston Area,0


In [8]:
#import Econ data from Houston

rita_4 = "./Economic Data/HoustonCtyUnemployment.csv"

rita_unemployment_df = pd.read_csv(rita_4)
rita_unemployment_df.head()

Unnamed: 0,DATE,TXHARR1URN
0,1/1/1990,5.1
1,2/1/1990,5.1
2,3/1/1990,4.7
3,4/1/1990,4.8
4,5/1/1990,5.0


In [9]:
# remove rows prior to 1990

rita_unemployment_df['DATE'] = pd.to_datetime(rita_unemployment_df['DATE'])

rita_unemployment_df = rita_unemployment_df[rita_unemployment_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_unemployment_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_unemployment_df.index)

rita_unemployment_df['Hurricane Date'] = pd.to_datetime(rita_unemployment_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_unemployment_df['Delta'] = ((rita_unemployment_df['DATE'] - rita_unemployment_df['Hurricane Date']).dt.days/365)

rita_unemployment_df['Delta'] = rita_unemployment_df['Delta'].astype(int)

#Rename column 
rita_unemployment_df = rita_unemployment_df.rename(columns={'TXHARR1URN': 'Unemployment'})

rita_unemployment_df.head()

Unnamed: 0,DATE,Unemployment,Hurricane,Hurricane Date,Location,Delta
132,2001-01-01,4.1,Rita,2005-09-18,Houston Area,-4
133,2001-02-01,3.9,Rita,2005-09-18,Houston Area,-4
134,2001-03-01,4.1,Rita,2005-09-18,Houston Area,-4
135,2001-04-01,4.1,Rita,2005-09-18,Houston Area,-4
136,2001-05-01,4.2,Rita,2005-09-18,Houston Area,-4


In [10]:
#import Econ data from Houston

rita_5 = "./Economic Data/HoustonMedianIncome.csv"

rita_median_df = pd.read_csv(rita_5)
rita_median_df.head()

Unnamed: 0,DATE,MHITX48201A052NCEN
0,1/1/1989,29643
1,1/1/1990,.
2,1/1/1991,.
3,1/1/1992,.
4,1/1/1993,34500


In [11]:
# remove rows prior to 1990

rita_median_df['DATE'] = pd.to_datetime(rita_median_df['DATE'])

rita_median_df = rita_median_df[rita_median_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_median_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_median_df.index)

rita_median_df['Hurricane Date'] = pd.to_datetime(rita_median_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_median_df['Delta'] = ((rita_median_df['DATE'] - rita_median_df['Hurricane Date']).dt.days/365)

rita_median_df['Delta'] = rita_median_df['Delta'].astype(int)

#Rename column 
rita_median_df = rita_median_df.rename(columns={'MHITX48201A052NCEN': 'Median'})

rita_median_df.head()

Unnamed: 0,DATE,Median,Hurricane,Hurricane Date,Location,Delta
12,2001-01-01,43279,Rita,2005-09-18,Houston Area,-4
13,2002-01-01,42704,Rita,2005-09-18,Houston Area,-3
14,2003-01-01,42262,Rita,2005-09-18,Houston Area,-2
15,2004-01-01,41922,Rita,2005-09-18,Houston Area,-1
16,2005-01-01,44085,Rita,2005-09-18,Houston Area,0


In [12]:
#import Econ data from Houston

rita_6 = "./Economic Data/HoustonPop.csv"

rita_population_df = pd.read_csv(rita_6)
rita_population_df.head()

Unnamed: 0,DATE,TXHARR1POP
0,1/1/1970,1741.913
1,1/1/1971,1799.0
2,1/1/1972,1836.2
3,1/1/1973,1880.7
4,1/1/1974,1947.1


In [15]:
# remove rows prior to 1990

rita_population_df['DATE'] = pd.to_datetime(rita_population_df['DATE'])

rita_population_df = rita_population_df[rita_population_df["DATE"] >= '2001-01-01']

#Add hurricane, year, location

rita_population_df[['Hurricane','Hurricane Date','Location']] = pd.DataFrame([['Rita','2005-09-18','Houston Area']], index = rita_population_df.index)

rita_population_df['Hurricane Date'] = pd.to_datetime(rita_population_df['Hurricane Date'])


#We will need to show data from multiple years, so delta from hurricane best option
rita_population_df['Delta'] = ((rita_population_df['DATE'] - rita_population_df['Hurricane Date']).dt.days/365)

rita_population_df['Delta'] = rita_population_df['Delta'].astype(int)

#Rename column 
rita_population_df = rita_population_df.rename(columns={'TXHARR1POP': 'Population'})

rita_population_df.head()

Unnamed: 0,DATE,Population,Hurricane,Hurricane Date,Location,Delta
31,2001-01-01,3483.804,Rita,2005-09-18,Houston Area,-4
32,2002-01-01,3559.095,Rita,2005-09-18,Houston Area,-3
33,2003-01-01,3618.297,Rita,2005-09-18,Houston Area,-2
34,2004-01-01,3672.286,Rita,2005-09-18,Houston Area,-1
35,2005-01-01,3733.714,Rita,2005-09-18,Houston Area,0


In [16]:
#export to CSV

output_data_file = './CleanEconData/rita_cln_unemployment.csv'
  
rita_unemployment_df.to_csv(output_data_file, sep=',')

output_data_file2 = './CleanEconData/rita_cln_realGDP.csv'
  
rita_real_GDP_df.to_csv(output_data_file2, sep=',')

output_data_file3 = './CleanEconData/rita_cln_housingauth.csv'
  
rita_HousingAuthorized_df.to_csv(output_data_file3, sep=',')

output_data_file4 = './CleanEconData/rita_cln_housingpriceindex.csv'
  
rita_Housing_df.to_csv(output_data_file4, sep=',')

output_data_file5 = './CleanEconData/rita_cln_median.csv'
  
rita_median_df.to_csv(output_data_file5, sep=',')

output_data_file6 = './CleanEconData/rita_cln_population.csv'
  
rita_population_df.to_csv(output_data_file6, sep=',')