# Data Gathering

In [3]:
from getKey import *
import quandl
quandl.ApiConfig.api_key = getKey('quandl.key')

#PMI
data = quandl.get("ISM/MAN_PMI")
data.head()

Unnamed: 0_level_0,PMI
Date,Unnamed: 1_level_1
1948-01-01,51.7
1948-02-01,50.2
1948-03-01,43.3
1948-04-01,45.4
1948-05-01,49.5


In [4]:
#S&P 500 returns
SPY = quandl.get("MULTPL/SP500_REAL_PRICE_MONTH")
SPY["shifted"] = SPY.shift(periods=1)
SPY["returns"] = (SPY["Value"] - SPY['shifted'])/SPY['shifted']
data["S&P 500 Returns"] = SPY["returns"]*100

data.head()

Unnamed: 0_level_0,PMI,S&P 500 Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1948-01-01,51.7,-1.330672
1948-02-01,50.2,-4.922454
1948-03-01,43.3,1.41844
1948-04-01,45.4,7.692308
1948-05-01,49.5,4.87013


In [5]:
#Building permits: https://www.census.gov/construction/bps/uspermits.html
#for some reason the date format changes at Jan 2000 which is index 505
import pandas as pd
permits = pd.read_csv('building_permits.csv')
permits["Date"] = pd.to_datetime((permits.Year*10000+permits.Month*100+permits.Day).apply(str),format='%Y%m%d')
permits = permits.set_index("Date")
data["Building Permits"] = permits["Total"]
data = data[permits.index[0]:'2018-07-01']
data.head()

Unnamed: 0_level_0,PMI,S&P 500 Returns,Building Permits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-01-01,64.4,3.982053,75.7
1959-02-01,66.9,-1.528227,79.0
1959-03-01,67.1,2.537886,119.0
1959-04-01,66.9,1.673789,127.9
1959-05-01,68.2,1.50613,120.8


# Feature Engineering

In [6]:
#Change in PMI
data['dPMI'] = data['PMI'] - data['PMI'].shift(1)

#Percent change in building permits
data['dBuilding Permits'] = 100*(data['Building Permits'] - data['Building Permits'].shift(1))/data['Building Permits'].shift(1)

#Previous n month's S&P 500 returns
n = 6
for i in range(1, n+1):
    data["S&P 500 Returns " + str(i) + " Months Ago"] = data["S&P 500 Returns"].shift(i)
data = data.dropna()
data.head()

Unnamed: 0_level_0,PMI,S&P 500 Returns,Building Permits,dPMI,dBuilding Permits,S&P 500 Returns 1 Months Ago,S&P 500 Returns 2 Months Ago,S&P 500 Returns 3 Months Ago,S&P 500 Returns 4 Months Ago,S&P 500 Returns 5 Months Ago,S&P 500 Returns 6 Months Ago
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1959-07-01,61.5,3.967978,112.3,-2.9,-7.113317,-0.862664,1.50613,1.673789,2.537886,-1.528227,3.982053
1959-08-01,55.1,-0.569133,106.4,-6.4,-5.253785,3.967978,-0.862664,1.50613,1.673789,2.537886,-1.528227
1959-09-01,48.3,-3.956229,102.5,-6.8,-3.665414,-0.569133,3.967978,-0.862664,1.50613,1.673789,2.537886
1959-10-01,49.7,-0.087642,94.6,1.4,-7.707317,-3.956229,-0.569133,3.967978,-0.862664,1.50613,1.673789
1959-11-01,50.6,0.403509,75.8,0.9,-19.87315,-0.087642,-3.956229,-0.569133,3.967978,-0.862664,1.50613


# Data Prep

In [7]:
#Add Y vector
data["S&P 500 Returns 1 Month Ahead"] = data["S&P 500 Returns"].shift(-1)
data = data.dropna()
data.tail()

Unnamed: 0_level_0,PMI,S&P 500 Returns,Building Permits,dPMI,dBuilding Permits,S&P 500 Returns 1 Months Ago,S&P 500 Returns 2 Months Ago,S&P 500 Returns 3 Months Ago,S&P 500 Returns 4 Months Ago,S&P 500 Returns 5 Months Ago,S&P 500 Returns 6 Months Ago,S&P 500 Returns 1 Month Ahead
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-02-01,60.8,-3.033909,92.1,1.7,-4.75698,4.708858,2.727087,1.431756,2.573771,1.490909,0.086386,-0.08835
2018-03-01,59.3,-0.08835,117.6,-1.5,27.687296,-3.033909,4.708858,2.727087,1.431756,2.573771,1.490909,-1.818135
2018-04-01,57.3,-1.818135,119.9,-2.0,1.955782,-0.08835,-3.033909,4.708858,2.727087,1.431756,2.573771,1.803567
2018-05-01,58.7,1.803567,125.0,1.4,4.253545,-1.818135,-0.08835,-3.033909,4.708858,2.727087,1.431756,1.956698
2018-06-01,60.2,1.956698,121.6,1.5,-2.72,1.803567,-1.818135,-0.08835,-3.033909,4.708858,2.727087,-0.644072


In [8]:
array = data.values
X = array[:, 0:11]
Y = array[:,11]

In [9]:
from sklearn.preprocessing import StandardScaler
import numpy as np
scaler = StandardScaler().fit(X)
rescaledX = scaler.transform(X)
np.set_printoptions(precision=2)

In [10]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(rescaledX, Y, test_size = 0.2, random_state = 69)

In [11]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
model = SVR(kernel='rbf')
param_dist = {'C':[1, 10, 100], 'epsilon':[0.01, 0.1, 1], 'shrinking':[False, True]}
random_search = GridSearchCV(model, param_grid = param_dist, cv=5)
random_search.fit(x_train, y_train)
results = random_search.score(x_test, y_test)