In [174]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import locale
from locale import atof
from scipy import stats

In [175]:
# Load in csv
filepath = os.path.join("CSV Files\\TWFC CSV\\Employment.csv")
employment_numbers_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\TWFC CSV\\Un_numbers.csv")
unemployment_numbers_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\TWFC CSV\\Un_rate.csv")
unemployment_rate_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\TWFC CSV\\Laborforce.csv")
laborforce_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\TWFC CSV\\Income.csv")
income_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\TWFC CSV\\Population.csv")
population_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\Zillow CSV\\Price.csv")
price_df = pd.read_csv(filepath)
filepath = os.path.join("CSV Files\\Zillow CSV\\Rent.csv")
rent_df = pd.read_csv(filepath)

Texas Workforce Commission Clean Up

In [176]:
# Merge the monthly CSVs
merge_df = pd.merge(employment_numbers_df, unemployment_numbers_df, how="outer", on=["Year", "Period ID", "Period", "Area"])
merge_df2 = pd.merge(merge_df, unemployment_rate_df, how="outer", on=["Year", "Period ID", "Period", "Area"])
merge_df3 = pd.merge(merge_df2, laborforce_df, how="outer", on=["Year", "Period ID", "Period", "Area"])

In [177]:
# Select desired columns 
merge_df = merge_df3.drop(merge_df3.columns[[2,4,6,8,10]], axis=1)

# Change column name of Area to City
merge_df=merge_df.rename(columns = {'Area':'City', 'Period ID': 'Month'})

# Slice out MSA out of the City Names
merge_df["City"] = merge_df["City"].replace(' MSA','',regex=True)
merge_df = merge_df

# Convert Strings to Floats
merge_df["Employment"] = merge_df["Employment"].replace(',','',regex=True).astype('float')
merge_df["Unemployment"] = merge_df["Unemployment"].replace(',','',regex=True).astype('float')
merge_df["Labor Force"] = merge_df["Labor Force"].replace(',','',regex=True).astype('float')

#Clean up city names
merge_df['City'], merge_df['x'] = merge_df["City"].str.split('-', 2).str[0:2].str
merge_df = merge_df.drop(merge_df.columns[[-1]], axis=1)

Zillow Data Clean Up

In [178]:
# Convert Region Name into Seperate City and State Columns
rent_df['City'],rent_df['State'] = rent_df["RegionName"].str.split(',', 2).str[0:2].str
rent_df['State'].replace(' TX','TX', inplace=True)
price_df['City'],price_df['State'] = price_df["RegionName"].str.split(',', 2).str[0:2].str
price_df['State'].replace(' TX','TX', inplace=True)

# Extract only the Texas Data
rent_tx_df = rent_df.loc[rent_df["State"] == "TX", :]
rent_tx_df = rent_tx_df.set_index('City')
price_tx_df = price_df.loc[price_df["State"] == "TX", :]
price_tx_df = price_tx_df.set_index('City')

# Remove Undesired Columns
rent_tx_df = rent_tx_df.drop(rent_tx_df.columns[[0,1,2,-1]], axis=1)
price_tx_df = price_tx_df.drop(price_tx_df.columns[[0,1,-1]], axis=1)

# Transpose Data Frame
rent = rent_tx_df.transpose()
price = price_tx_df.transpose()

#Move the Index
rent.reset_index(level=0, inplace=True)
rent = rent.rename(columns = {'index':'Date'})
rent = rent.reset_index(drop=True)
price.reset_index(level=0, inplace=True)
price = price.rename(columns = {'index':'Date'})
price = price.reset_index(drop=True)

# Convert the Date into a Month and Year Column
rent['Year'],rent['Month'] = rent["Date"].str.split('-', 2).str[0:2].str
rent = rent.drop(rent.columns[[0]], axis=1)
price['Year'],price['Month'] = price["Date"].str.split('-', 2).str[0:2].str
price = price.drop(price.columns[[0]], axis=1)

# Melt Data Frames and Restructure and Merge
rent_melt = pd.melt(rent, id_vars=["Year", "Month"])
rent_melt = rent_melt.rename(columns={'value': 'Rent'})
rent_melt = rent_melt.replace('Dallas-Fort Worth', 'Dallas')
price_melt = pd.melt(price, id_vars=["Year", "Month"])
price_melt = price_melt.rename(columns={'value': 'House Price'})
merge_melts = pd.merge(rent_melt,price_melt, how='left', on=['Year','Month','City'])

In [179]:
# Merge TWFC Data with Zillow Data
monthly_merge = pd.merge(merge_df, merge_melts, how="outer", on=["Year", "Month", "City"])

In [180]:
# Clean up Income data to merge to annual data frame
# Select desired columns 
income_df = income_df.drop(income_df.columns[[1,3,4]], axis=1)

# Change column name of Area to City
income_df = income_df.rename(columns = {'Area':'City'})

# Slice out MSA out of the City Names
income_df["City"] = income_df["City"].replace(' MSA','',regex=True)

#Clean up city names
income_df['City'], income_df['x'] = income_df["City"].str.split('-', 2).str[0:2].str
income_df = income_df.drop(income_df.columns[[-1]], axis=1)

# Convert Strings to Floats
income_df["Income"] = income_df["Income"].replace(',','',regex=True)

income = []
for x in income_df["Income"]:
    x = x[1:]
    x = int(x)
    income.append(x)

income_df["Income"] = income

# Merge Income Data Frame to the Main Data Frame
df_final = pd.merge(monthly_merge, income_df, how="outer", on=["City", "Year"])

In [181]:
df_final = df_final.groupby(['Year','City']).mean()
df_final.to_csv('Master Dataframe.csv',index=True)

In [182]:
Y2010 =df_final.loc[2010]
Y2010 = Y2010.dropna()
Y2011 =df_final.loc[2011]
Y2011 = Y2011.dropna()
Y2012 =df_final.loc[2012]
Y2012 = Y2012.dropna()
Y2013 =df_final.loc[2013]
Y2013 = Y2013.dropna()
Y2014 =df_final.loc[2014]
Y2014 = Y2014.dropna()
Y2015 =df_final.loc[2015]
Y2015 = Y2015.dropna()

In [183]:
rent_10 = stats.pearsonr(Y2010['Income'], Y2010['Rent'])
price_10 = stats.pearsonr(Y2010['Income'], Y2010['House Price'])
rent_11 = stats.pearsonr(Y2011['Income'], Y2011['Rent'])
price_11 = stats.pearsonr(Y2011['Income'], Y2011['House Price'])
rent_12 = stats.pearsonr(Y2012['Income'], Y2012['Rent'])
price_12 = stats.pearsonr(Y2012['Income'], Y2012['House Price'])
rent_13 = stats.pearsonr(Y2013['Income'], Y2013['Rent'])
price_13 = stats.pearsonr(Y2013['Income'], Y2013['House Price'])
rent_14 = stats.pearsonr(Y2014['Income'], Y2014['Rent'])
price_14 = stats.pearsonr(Y2014['Income'], Y2014['House Price'])
rent_15 = stats.pearsonr(Y2015['Income'], Y2015['Rent'])
price_15 = stats.pearsonr(Y2015['Income'], Y2015['House Price'])

In [184]:
rent_10

(0.71555196431565393, 0.0008413058896690899)

In [185]:
price_10

(0.25243318310485563, 0.3122149918088124)

In [186]:
rent_11

(0.68316623413090405, 0.0017774195869472195)

In [187]:
price_11

(0.29124347081966639, 0.24096810451295073)

In [188]:
rent_12

(0.61064561610687107, 0.0042392469369925767)

In [189]:
price_12

(0.24604532124005499, 0.29570774311032311)

In [190]:
rent_13

(0.58602673665034133, 0.0066211235275599077)

In [191]:
price_13

(0.43928896360349579, 0.052635759874550367)

In [192]:
rent_14

(0.63192237792811179, 0.0021187449141878696)

In [193]:
price_14

(0.56642006153354618, 0.0074289153536602386)

In [194]:
rent_15

(0.65817511882843005, 0.0011806673205400225)

In [195]:
price_15

(0.57736059740904788, 0.0061323494109212775)