In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Treasury Bonds

In [None]:
t_10 = pd.read_csv("../Dataset/t_10yr.csv", parse_dates=[0])
t_30 = pd.read_csv("../Dataset/t_30yr.csv", parse_dates=[0])
t_5 = pd.read_csv("../Dataset/t_5yr.csv", parse_dates=[0])

In [13]:
def rename_columns(df, column_name, changed_name):
    new_df = df.rename(columns={column_name:changed_name, "Monthly Change":f"{changed_name} Mon_Chg", "Percent Monthly Change":f"{changed_name} Per_Mon_Chg", "Yearly Change":f"{changed_name} Yr_Chg", "Percent Yearly Change":f"{changed_name} Per_Yr_Chg"})
    return new_df

In [None]:
ts_10 = rename_columns(t_10, "DGS10", "10yr_Maturity")

In [None]:
ts_5 = rename_columns(t_5, "GS5", "5yr_Maturity")

In [None]:
ts_30 = rename_columns(t_30, "GS30", "30yr_Maturity" )

In [None]:
treasury_rate = ts_5.merge(ts_10, on="DATE", how="outer")

In [None]:
treasury_rate = treasury_rate.merge(ts_30, on="DATE", how="outer")

In [None]:
treasury_rate.to_csv("../Dataset/Treasury Bond.csv", index=False)

## Stock Exchange Index

In [None]:
nasdaq = pd.read_csv("../Dataset/nasdaq.csv", parse_dates=[0])
wilshire = pd.read_csv("../Dataset/wil5000.csv", parse_dates=[0])

In [None]:
nasdaq.columns

In [None]:
nasdaq = rename_columns(nasdaq, "NASDAQCOM", "NASDAQCOM")

In [None]:
wilshire = rename_columns(wilshire, "WILL5000PR", "WILL5000")

In [None]:
stock_exchange = nasdaq.merge(wilshire, on="DATE", how="outer")

In [None]:
stock_exchange.to_csv("../Dataset/Stock Exchange.csv", index=False)

# Final Dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
house_index = pd.read_csv("../Dataset/cshpi_processed.csv", parse_dates=[0])
cpi = pd.read_csv("../Dataset/cpi.csv", parse_dates=[0])
house_cons = pd.read_csv("../Dataset/house construction.csv", parse_dates=[0])
mortgage = pd.read_csv("../Dataset/mortgage.csv", parse_dates=[0])
rent = pd.read_csv("../Dataset/rent.csv", parse_dates=[0])
stock_exchange = pd.read_csv("../Dataset/stock exchange.csv", parse_dates=[0])
treasury_bonds = pd.read_csv("../Dataset/treasury bond.csv", parse_dates=[0])

In [3]:
house_index.columns

Index(['observation_date', 'CSUSHPISA', 'Change from Last Month',
       'Percent Change from Last Month', 'Change From Year Ago',
       'Percent Change From Year Ago'],
      dtype='object')

In [4]:
house_index = house_index.rename(columns={"observation_date":"DATE"})

In [5]:
mortgage.columns

Index(['DATE', 'MORTGAGE15US', 'MORTGAGE30US', 'MORTGAGE5US',
       'MORTGAGE15US_Monthly Change', 'MORTGAGE15US_Percent Monthly Change',
       'MORTGAGE15US_Yearly Change', 'MORTGAGE15US_Percent Yearly Change',
       'MORTGAGE30US_Monthly Change', 'MORTGAGE30US_Percent Monthly Change',
       'MORTGAGE30US_Yearly Change', 'MORTGAGE30US_Percent Yearly Change',
       'MORTGAGE5US_Monthly Change', 'MORTGAGE5US_Percent Monthly Change',
       'MORTGAGE5US_Yearly Change', 'MORTGAGE5US_Percent Yearly Change'],
      dtype='object')

In [19]:
mortgage_filtered = mortgage[["DATE","MORTGAGE15US_Monthly Change", "MORTGAGE30US_Monthly Change", "MORTGAGE5US_Monthly Change"]]

In [20]:
mortgage_filtered.columns

Index(['DATE', 'MORTGAGE15US_Monthly Change', 'MORTGAGE30US_Monthly Change',
       'MORTGAGE5US_Monthly Change'],
      dtype='object')

In [8]:
stock_exchange.columns

Index(['DATE', 'NASDAQCOM', 'NASDAQCOM Mon_Chg', 'NASDAQCOM Per_Mon_Chg',
       'NASDAQCOM Yr_Chg', 'NASDAQCOM Per_Yr_Chg', 'WILL5000',
       'WILL5000 Mon_Chg', 'WILL5000 Per_Mon_Chg', 'WILL5000 Yr_Chg',
       'WILL5000 Per_Yr_Chg'],
      dtype='object')

In [21]:
stock_exchange_filtered = stock_exchange[["DATE", "NASDAQCOM Mon_Chg", "WILL5000 Per_Mon_Chg"]]

In [22]:
stock_exchange_filtered.columns

Index(['DATE', 'NASDAQCOM Mon_Chg', 'WILL5000 Per_Mon_Chg'], dtype='object')

In [11]:
rent.columns

Index(['DATE', 'CUSR0000SEHA_NBD20000101', 'Monthly Change',
       'Monthly Percent Change', 'Yearly Change', 'Percent Yearly Change'],
      dtype='object')

In [14]:
rent = rename_columns(rent, "CUSR0000SEHA_NBD20000101", "Rent Index")

In [15]:
rent.columns

Index(['DATE', 'Rent Index', 'Rent Index Mon_Chg', 'Monthly Percent Change',
       'Rent Index Yr_Chg', 'Rent Index Per_Yr_Chg'],
      dtype='object')

In [23]:
rent_filtered = rent[["DATE", "Rent Index Mon_Chg"]]

In [24]:
rent_filtered.columns

Index(['DATE', 'Rent Index Mon_Chg'], dtype='object')

In [26]:
df = mortgage_filtered.merge(rent_filtered, on="DATE", how="outer")

In [27]:
df = df.merge(stock_exchange_filtered, on="DATE", how="outer")

In [29]:
house_cons.columns

Index(['DATE', 'Completed', 'Under Construction'], dtype='object')

In [31]:
house_cons_filtered = house_cons.rename(columns={"Completed":"House Completed", "Under Construction":"House Under Construction"})

In [32]:
df = df.merge(house_cons_filtered, on="DATE", how="outer")

In [33]:
df.columns

Index(['DATE', 'MORTGAGE15US_Monthly Change', 'MORTGAGE30US_Monthly Change',
       'MORTGAGE5US_Monthly Change', 'Rent Index Mon_Chg', 'NASDAQCOM Mon_Chg',
       'WILL5000 Per_Mon_Chg', 'House Completed', 'House Under Construction'],
      dtype='object')

In [34]:
treasury_bonds.columns

Index(['DATE', '5yr_Maturity', '5yr_Maturity Mon_Chg',
       '5yr_Maturity Per_Mon_Chg', '5yr_Maturity Yr_Chg',
       '5yr_Maturity Per_Yr_Chg', '10yr_Maturity', '10yr_Maturity Mon_Chg',
       '10yr_Maturity Per_Mon_Chg', '10yr_Maturity Yr_Chg',
       '10yr_Maturity Per_Yr_Chg', '30yr_Maturity', '30yr_Maturity Mon_Chg',
       '30yr_Maturity Per_Mon_Chg', '30yr_Maturity Yr_Chg',
       '30yr_Maturity Per_Yr_Chg'],
      dtype='object')

In [36]:
treasury_bonds_filtered = treasury_bonds[[ "DATE", "5yr_Maturity Mon_Chg", "10yr_Maturity Mon_Chg", "30yr_Maturity Per_Mon_Chg"]]

In [38]:
df = df.merge(treasury_bonds_filtered, on="DATE", how="outer")

In [39]:
def add_columns_updated(df, column_name):
    monthly_change , percent_monthly_change = monthly_changes(df, column_name)
    yearly_change, percent_yearly_change = yearly_changes(df, column_name)
    df[f"{column_name}_Monthly Change"] = monthly_change
    df[f"{column_name}_Percent Monthly Change"] = percent_monthly_change
    df[f"{column_name}_Yearly Change"] = yearly_change
    df[f"{column_name}_Percent Yearly Change"] = percent_yearly_change

In [40]:
cpi.columns

Index(['DATE', 'CPIAUCSL_NBD20000101', 'Monthly Change',
       'Percent Monthly Change', 'Yearly Change', 'Percent Yearly Change'],
      dtype='object')

In [43]:
cpi = cpi.rename(columns={"CPIAUCSL_NBD20000101":"CPI"})

In [50]:
cpi = rename_columns(cpi, "CPI", "CPI")

In [51]:
cpi.columns

Index(['DATE', 'CPI', 'CPI Mon_Chg', 'CPI Per_Mon_Chg', 'CPI Yr_Chg',
       'CPI Per_Yr_Chg'],
      dtype='object')

In [52]:
cpi_filtered = cpi[["DATE", "CPI Mon_Chg"]]

In [56]:
df = df.merge(cpi_filtered, on="DATE", how="outer")

In [57]:
df.columns

Index(['DATE', 'MORTGAGE15US_Monthly Change', 'MORTGAGE30US_Monthly Change',
       'MORTGAGE5US_Monthly Change', 'Rent Index Mon_Chg', 'NASDAQCOM Mon_Chg',
       'WILL5000 Per_Mon_Chg', 'House Completed', 'House Under Construction',
       '5yr_Maturity Per_Mon_Chg', '10yr_Maturity Per_Mon_Chg',
       '30yr_Maturity Per_Mon_Chg', 'CPI Mon_Chg'],
      dtype='object')

In [59]:
house_index.columns

Index(['DATE', 'CSUSHPISA', 'Change from Last Month',
       'Percent Change from Last Month', 'Change From Year Ago',
       'Percent Change From Year Ago'],
      dtype='object')

In [60]:
rename_columns(house_index, "CSUSHPISA", "CSUSHPISA")

Unnamed: 0,DATE,CSUSHPISA,Change from Last Month,Percent Change from Last Month,Change From Year Ago,Percent Change From Year Ago
0,2000-01-01,100.552,0.000,0.000000,,
1,2000-02-01,101.339,0.787,0.782680,,
2,2000-03-01,102.127,0.788,0.777588,,
3,2000-04-01,102.922,0.795,0.778443,,
4,2000-05-01,103.677,0.755,0.733565,,
...,...,...,...,...,...,...
247,2020-08-01,222.432,2.782,1.266560,12.246,5.826268
248,2020-09-01,225.608,3.176,1.427852,14.729,6.984574
249,2020-10-01,229.339,3.731,1.653753,17.746,8.386856
250,2020-11-01,232.673,3.334,1.453743,20.164,9.488539


In [61]:
house_index_filtered = house_index[["DATE", "Change from Last Month"]]

In [64]:
house_index_filtered=house_index_filtered.rename(columns={"Change from Last Month": "House Index Monthly Change"})

In [67]:
df = df.merge(house_index_filtered, on="DATE", how="outer")

In [69]:
df.to_csv("../Notebooks/Dataset.csv", index=False)