In [1]:
#Import dependencies
import pandas as pd
from datetime import datetime
from sqlalchemy import create_engine
import numpy as np

In [2]:
#Load files

# Mortgage Interest Rates file
mortgage_rate = '../Resources/FMAC-30US-Mortgage-Interest-Rates.csv'
# Interest Rates file - Change from previous period
mortgage_rate_change = '../Resources/FMAC-30US-Mortgage-Interest-Rates-Change.csv'
# Interest Rates file - Percent Change from previous period
mortgage_rate_pct_change = '../Resources/FMAC-30US-Mortgage-Interest-Rates-Pct-Change.csv'

# House Pricing Index - USA   
house_pricing_index = '../Resources/FMAC-HPI_USA.csv'
# House Pricing Change - USA
house_pricing_change = '../Resources/FMAC-HPI_USA-Change.csv'
# House Pricing Percent Change - USA
house_pricing_pct_change = '../Resources/FMAC-HPI_USA-Pct-Change.csv'


In [3]:
# The correct encoding must be used to read the CSV in pandas
mortgage_rate_df = pd.read_csv(mortgage_rate, encoding="ISO-8859-1")
mortgage_ratechange_df = pd.read_csv(mortgage_rate_change, encoding="ISO-8859-1")
mortgage_pctchange_df = pd.read_csv(mortgage_rate_pct_change, encoding="ISO-8859-1")

hpi_df = pd.read_csv(house_pricing_index, encoding="ISO-8859-1")
hpi_change_df = pd.read_csv(house_pricing_change, encoding="ISO-8859-1")
hpi_pctchange_df = pd.read_csv(house_pricing_pct_change, encoding="ISO-8859-1")


In [4]:
mortgage_rate_df

Unnamed: 0,Date,Value
0,2021-01-31,2.65
1,2020-12-31,2.67
2,2020-11-30,2.72
3,2020-10-31,2.81
4,2020-09-30,2.90
...,...,...
593,1971-08-31,7.69
594,1971-07-31,7.69
595,1971-06-30,7.54
596,1971-05-31,7.46


In [5]:
mortgage_rate_df[['Year','Month','Day']] = mortgage_rate_df.Date.str.split("-",expand=True)
mortgage_rate_df= mortgage_rate_df.drop(['Date'], axis = 1) 
mortgage_rate_df= mortgage_rate_df.drop(['Day'], axis = 1)
mortgage_rate_df= mortgage_rate_df.rename(columns={"Value": "Rate"})
mortgage_rate_df

Unnamed: 0,Rate,Year,Month
0,2.65,2021,01
1,2.67,2020,12
2,2.72,2020,11
3,2.81,2020,10
4,2.90,2020,09
...,...,...,...
593,7.69,1971,08
594,7.69,1971,07
595,7.54,1971,06
596,7.46,1971,05


In [6]:
mortgage_ratechange_df

Unnamed: 0,Date,Value
0,1/31/2021,-0.02
1,12/31/2020,-0.05
2,11/30/2020,-0.09
3,10/31/2020,-0.09
4,9/30/2020,-0.01
...,...,...
592,9/30/1971,-0.02
593,8/31/1971,0.00
594,7/31/1971,0.15
595,6/30/1971,0.08


In [7]:
mortgage_ratechange_df[['Month','Day','Year']] = mortgage_ratechange_df.Date.str.split("/",expand=True)
mortgage_ratechange_df= mortgage_ratechange_df.drop(['Date'], axis = 1) 
mortgage_ratechange_df= mortgage_ratechange_df.drop(['Day'], axis = 1)
mortgage_ratechange_df= mortgage_ratechange_df.rename(columns={"Value": "Rate_Change"})
mortgage_ratechange_df

Unnamed: 0,Rate_Change,Month,Year
0,-0.02,1,2021
1,-0.05,12,2020
2,-0.09,11,2020
3,-0.09,10,2020
4,-0.01,9,2020
...,...,...,...
592,-0.02,9,1971
593,0.00,8,1971
594,0.15,7,1971
595,0.08,6,1971


In [8]:
mortgage_pctchange_df

Unnamed: 0,Date,Value
0,2021-01-31,-0.007491
1,2020-12-31,-0.018382
2,2020-11-30,-0.032028
3,2020-10-31,-0.031034
4,2020-09-30,-0.003436
...,...,...
592,1971-09-30,-0.002601
593,1971-08-31,0.000000
594,1971-07-31,0.019894
595,1971-06-30,0.010724


In [9]:
mortgage_pctchange_df[['Year','Month','Day']] = mortgage_pctchange_df.Date.str.split("-",expand=True)
mortgage_pctchange_df= mortgage_pctchange_df.drop(['Date'], axis = 1) 
mortgage_pctchange_df= mortgage_pctchange_df.drop(['Day'], axis = 1)
mortgage_pctchange_df = mortgage_pctchange_df .rename(columns={"Value": "Percent_Rate_Change"})
mortgage_pctchange_df

Unnamed: 0,Percent_Rate_Change,Year,Month
0,-0.007491,2021,01
1,-0.018382,2020,12
2,-0.032028,2020,11
3,-0.031034,2020,10
4,-0.003436,2020,09
...,...,...,...
592,-0.002601,1971,09
593,0.000000,1971,08
594,0.019894,1971,07
595,0.010724,1971,06


In [12]:
# Combine the mortgage_rate_df dataframe with the mortgage_ratechange_df dataframe: join on Year and Month
new_mortgage_rate_df = pd.merge(mortgage_rate_df, mortgage_ratechange_df, on=['Year','Month'], how='inner')
new_mortgage_rate_df

Unnamed: 0,Rate,Year,Month,Percent_Rate_Change,Rate_Change
0,2.67,2020,12,-0.018382,-0.05
1,2.72,2020,11,-0.032028,-0.09
2,2.81,2020,10,-0.031034,-0.09
3,3.74,2019,12,0.016304,0.06
4,3.68,2019,11,-0.026455,-0.10
...,...,...,...,...,...
145,7.44,1972,11,0.002695,0.02
146,7.42,1972,10,-0.001346,-0.01
147,7.48,1971,12,-0.003995,-0.03
148,7.51,1971,11,-0.015727,-0.12


In [10]:
# Merge dataframes by adding columns from to mortgage_ratechange_df and mortgage_pctchange_df to mortgage_rate_df
mortgage_rate_df['Percent_Rate_Change'] = mortgage_pctchange_df['Percent_Rate_Change']

In [33]:
mortgage_ratechange_df['Rate'] =  mortgage_rate_df['Rate']
mortgage_ratechange_df

KeyError: 'Rate'

In [33]:
# Change Year data type to numeric
mortgage_ratechange_df['Year'] = mortgage_ratechange_df['Year'].apply(pd.to_numeric)

In [37]:
# Select only records >= 2010
mortgage_ratechange_df  = mortgage_ratechange_df.loc[mortgage_ratechange_df["Year"] >= 2010]
mortgage_ratechange_df

Unnamed: 0,Rate_Change,Year,Month,Percent_Rate_Change,Rate
0,-0.02,2021,1,-0.007491,2.65
1,-0.05,2020,12,-0.018382,2.67
2,-0.09,2020,11,-0.032028,2.72
3,-0.09,2020,10,-0.031034,2.81
4,-0.01,2020,9,-0.003436,2.90
...,...,...,...,...,...
128,-0.28,2010,5,-0.055336,4.78
129,0.07,2010,4,0.014028,5.06
130,-0.06,2010,3,-0.011881,4.99
131,0.07,2010,2,0.014056,5.05


In [13]:
hpichange

Unnamed: 0,Date,NSA Value,SA Value
0,"November 30, 2020",2.156922,3.177881
1,"October 31, 2020",2.167205,3.091581
2,"September 30, 2020",2.336903,3.124809
3,"August 31, 2020",2.583641,3.019234
4,"July 31, 2020",2.636928,2.580970
...,...,...,...
545,"June 30, 1975",0.021420,-0.038146
546,"May 31, 1975",0.185506,0.134354
547,"April 30, 1975",0.323847,0.269986
548,"March 31, 1975",0.250342,0.217947


In [36]:
mortgage_ratechange_df

Unnamed: 0,Rate_Change,Year,Month,Percent_Rate_Change,Rate
0,-0.02,2021,1,-0.007491,2.65
1,-0.05,2020,12,-0.018382,2.67
2,-0.09,2020,11,-0.032028,2.72
3,-0.09,2020,10,-0.031034,2.81
4,-0.01,2020,9,-0.003436,2.90
...,...,...,...,...,...
128,-0.28,2010,5,-0.055336,4.78
129,0.07,2010,4,0.014028,5.06
130,-0.06,2010,3,-0.011881,4.99
131,0.07,2010,2,0.014056,5.05


In [14]:
hpichange ['Year']  = hpichange['Date'].str[-4:]
hpichange['Months']  = hpichange['Date'].str[:-8]
hpichange = hpichange.rename(columns={"NSA Value": "NSA_Change", 'SA Value':'SA_Change'})
hpichange = hpichange.drop(['Date'], axis = 1)
hpichange 

Unnamed: 0,NSA_Change,SA_Change,Year,Months
0,2.156922,3.177881,2020,November
1,2.167205,3.091581,2020,October
2,2.336903,3.124809,2020,September
3,2.583641,3.019234,2020,August
4,2.636928,2.580970,2020,July
...,...,...,...,...
545,0.021420,-0.038146,1975,June
546,0.185506,0.134354,1975,May
547,0.323847,0.269986,1975,April
548,0.250342,0.217947,1975,March


In [15]:
hpipercent ['Year']  = hpipercent['Date'].str[-4:]
hpipercent['Months']  = hpipercent['Date'].str[:-8]
hpipercent = hpipercent.rename(columns={"NSA Value": "NSA_PctChange", 'SA Value':'SA_PctChange'})
hpipercent = hpipercent.drop(['Date'], axis = 1)
hpipercent

Unnamed: 0,NSA_PctChange,SA_PctChange,Year,Months
0,0.010069,0.014910,2020,November
1,0.010221,0.014718,2020,October
2,0.011144,0.015101,2020,September
3,0.012474,0.014807,2020,August
4,0.012896,0.012820,2020,July
...,...,...,...,...
545,0.000870,-0.001559,1975,June
546,0.007592,0.005522,1975,May
547,0.013431,0.011221,1975,April
548,0.010491,0.009141,1975,March


In [16]:
hpi ['Year']  = hpi['Date'].str[-4:]
hpi['Months']  = hpi['Date'].str[:-8]
hpi = hpi.rename(columns={"NSA Value": "NSA_Value", 'SA Value':'SA_Value'})
hpi = hpi.drop(['Date'], axis = 1)
hpi

Unnamed: 0,NSA_Value,SA_Value,Year,Months
0,216.362121,216.317308,2020,November
1,214.205199,213.139427,2020,October
2,212.037994,210.047846,2020,September
3,209.701092,206.923037,2020,August
4,207.117450,203.903803,2020,July
...,...,...,...,...
546,24.621368,24.465147,1975,May
547,24.435862,24.330793,1975,April
548,24.112015,24.060807,1975,March
549,23.861673,23.842861,1975,February


In [17]:
hpi['NSA_PctChange'] = hpipercent['NSA_PctChange']

In [18]:
hpi['SA_PctChange'] = hpipercent['SA_PctChange']

In [19]:
hpi['NSA_Change'] = hpichange['NSA_Change']

In [20]:
hpi['SA_Change'] = hpichange['SA_Change']

In [21]:
hpi

Unnamed: 0,NSA_Value,SA_Value,Year,Months,NSA_PctChange,SA_PctChange,NSA_Change,SA_Change
0,216.362121,216.317308,2020,November,0.010069,0.014910,2.156922,3.177881
1,214.205199,213.139427,2020,October,0.010221,0.014718,2.167205,3.091581
2,212.037994,210.047846,2020,September,0.011144,0.015101,2.336903,3.124809
3,209.701092,206.923037,2020,August,0.012474,0.014807,2.583641,3.019234
4,207.117450,203.903803,2020,July,0.012896,0.012820,2.636928,2.580970
...,...,...,...,...,...,...,...,...
546,24.621368,24.465147,1975,May,0.007592,0.005522,0.185506,0.134354
547,24.435862,24.330793,1975,April,0.013431,0.011221,0.323847,0.269986
548,24.112015,24.060807,1975,March,0.010491,0.009141,0.250342,0.217947
549,23.861673,23.842861,1975,February,0.006347,0.006732,0.150506,0.159447


In [23]:
hpi = hpi[['Year', 'Months', 'NSA_Value', 'SA_Value', 'NSA_Change', 'SA_Change','NSA_PctChange', 'SA_PctChange']]
hpi

Unnamed: 0,Year,Months,NSA_Value,SA_Value,NSA_Change,SA_Change,NSA_PctChange,SA_PctChange
0,2020,November,216.362121,216.317308,2.156922,3.177881,0.010069,0.014910
1,2020,October,214.205199,213.139427,2.167205,3.091581,0.010221,0.014718
2,2020,September,212.037994,210.047846,2.336903,3.124809,0.011144,0.015101
3,2020,August,209.701092,206.923037,2.583641,3.019234,0.012474,0.014807
4,2020,July,207.117450,203.903803,2.636928,2.580970,0.012896,0.012820
...,...,...,...,...,...,...,...,...
546,1975,May,24.621368,24.465147,0.185506,0.134354,0.007592,0.005522
547,1975,April,24.435862,24.330793,0.323847,0.269986,0.013431,0.011221
548,1975,March,24.112015,24.060807,0.250342,0.217947,0.010491,0.009141
549,1975,February,23.861673,23.842861,0.150506,0.159447,0.006347,0.006732


In [24]:
hpi.dtypes

Year              object
Months            object
NSA_Value        float64
SA_Value         float64
NSA_Change       float64
SA_Change        float64
NSA_PctChange    float64
SA_PctChange     float64
dtype: object

In [26]:
hpi['Year'] = hpi['Year'].apply(pd.to_numeric)

In [27]:
hpi = hpi.loc[hpi["Year"] >= 2010]

In [28]:
hpi

Unnamed: 0,Year,Months,NSA_Value,SA_Value,NSA_Change,SA_Change,NSA_PctChange,SA_PctChange
0,2020,November,216.362121,216.317308,2.156922,3.177881,0.010069,0.014910
1,2020,October,214.205199,213.139427,2.167205,3.091581,0.010221,0.014718
2,2020,September,212.037994,210.047846,2.336903,3.124809,0.011144,0.015101
3,2020,August,209.701092,206.923037,2.583641,3.019234,0.012474,0.014807
4,2020,July,207.117450,203.903803,2.636928,2.580970,0.012896,0.012820
...,...,...,...,...,...,...,...,...
126,2010,May,131.857528,130.336896,0.450435,-0.589505,0.003428,-0.004503
127,2010,April,131.407093,130.926401,0.653962,-0.445876,0.005001,-0.003394
128,2010,March,130.753131,131.372276,0.546432,-0.105877,0.004197,-0.000805
129,2010,February,130.206699,131.478154,-0.059507,-0.087918,-0.000457,-0.000668
