<a href="https://colab.research.google.com/github/obliquesignal/algo-trading-bootcamp/blob/master/5a_Multilayer_Perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
from keras import models
from keras import layers

In [None]:
import numpy as np
import pandas as pd
import pandas_datareader.data as pdr
from datetime import datetime

import matplotlib.pyplot as plt
plt.style.use('seaborn')

# Import and Process Data

In [None]:
start = datetime(1982, 1, 1)
end = datetime(2020, 2, 29)

recession = pdr.DataReader('USREC', 'fred', start, end) #NBER business cycle classification
yield_curve = pdr.DataReader('T10Y3MM', 'fred', start, end) #Difference between the 3 month and 10 year treasury yields
unemployment = pdr.DataReader('UNRATE', 'fred', start, end) #Unemployment rate
industrial_capacity = pdr.DataReader('TCU', 'fred', start, end) #Total industrial capacity utilization

In [None]:
#Create target dataframe
target = recession[1:] #Align target and feature rows
target.head()

Unnamed: 0_level_0,USREC
DATE,Unnamed: 1_level_1
1982-02-01,1
1982-03-01,1
1982-04-01,1
1982-05-01,1
1982-06-01,1


In [None]:
#Percentage of time the US economy was in recession since 1982. This is an imbalanced dataset
round(target['USREC'].sum()/target['USREC'].count()*100, 2)

9.63

In [None]:
#Create features dataframe
features = pd.DataFrame()
features['curve'] = yield_curve['T10Y3MM'].diff() #Difference between the yields of the 3 month bill and the 10 year note
features['unemployment'] = unemployment['UNRATE'].diff()
features['industrial'] = industrial_capacity['TCU'].diff()
features = features.dropna()
features.head()

Unnamed: 0_level_0,curve,unemployment,industrial
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1982-02-01,-1.52,0.3,1.3705
1982-03-01,0.4,0.1,-0.6713
1982-04-01,-0.02,0.3,-0.8309
1982-05-01,0.38,0.1,-0.6352
1982-06-01,0.31,0.2,-0.2984


In [None]:
#Standardize data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

target = np.ravel(target) #Need to covert column vector into a 1-d Numpy array

scaler = StandardScaler() #Scales input data so that columns/features have a mean of 0 and a standard deviation of 1
features_scaled = scaler.fit_transform(features) 

#Split dataset into train and test subsets. Test size is 25% of the total dataset and the data are not shuffled to preserve temporal structure of timeseries

features_train, features_test, target_train, target_test = train_test_split(features_scaled, target, test_size = 0.25, shuffle=False, random_state=101)

# Build Network

In [None]:
#Stacking layers to create a network. A dense layer is a fully connected layer
fcn = models.Sequential() 

#First hidden layer needs to specify the shape of the feature data it will receive from the input layer
fcn.add(layers.Dense(units=32, activation='relu', input_shape=(3,)))

#Hidden layer which has 32 neurons and uses the Relu activation function to learn nonlinearity. Does not need an input_shape parameter
fcn.add(layers.Dense(units=32, activation='relu'))

# Output layer which uses the Sigmoid function for binary classification
fcn.add(layers.Dense(units=1, activation='sigmoid'))

#Network will minimize the binary cross entropy loss function using the Adam optimizer and will evaluate success using accuracy metric
fcn.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')

In [None]:
fcn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                128       
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 1,217
Trainable params: 1,217
Non-trainable params: 0
_________________________________________________________________


# Train Network

In [None]:
fcn.fit(features_train, target_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f90322f63a0>

# Evaluate Network

In [None]:
fcn.evaluate(features_test, target_test)



[0.0684034526348114, 0.991304337978363]

In [None]:
#Use out-of-sample March data to predict recession
march = [[0.61, 0.9, -4.2351]]
fcn.predict(march).round(2)



array([[0.76]], dtype=float32)

# Build Network for Regression

In [None]:
gdp = pdr.DataReader('A191RL1Q225SBEA', 'fred', start, end) #Annualized percentage change in real GDP from previous quarter

In [None]:
#Create regression target dataframe
targetvalue = gdp
targetvalue = targetvalue[1:] #Get rid of the first row in order to align with the features matrix
targetvalue.head()      

Unnamed: 0_level_0,A191RL1Q225SBEA
DATE,Unnamed: 1_level_1
1982-04-01,1.8
1982-07-01,-1.5
1982-10-01,0.2
1983-01-01,5.4
1983-04-01,9.4


In [None]:
#Downsample features dataframe and convert it into a quarterly frequency
qfeatures = features.resample('1q', label='right').last()
#Removes the last row
qmarch = qfeatures[-1:]
#Removes the last row to align with features dataframe
qfeatures = qfeatures[:-1]
qfeatures.tail()

Unnamed: 0_level_0,curve,unemployment,industrial
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-12-31,-0.33,0.1,-0.1275
2019-03-31,-0.12,0.0,-0.0052
2019-06-30,-0.15,-0.1,-0.0685
2019-09-30,0.13,-0.2,-0.3353
2019-12-31,0.05,0.0,-0.3139


In [None]:
qfeatures_scaled = scaler.fit_transform(qfeatures) #Every feature now has a mean of 0 and a standard deviation of 1

#Split dataset into train and test subsets. Test size is 25% of the total dataset and the data are not shuffled to preserve temporal structure of timeseries

qfeatures_train, qfeatures_test, targetvalue_train, targetvalue_test = train_test_split(qfeatures_scaled, targetvalue, test_size = 0.25, shuffle=False, random_state=0)

In [None]:
regression_fcn = models.Sequential() #Stacking layers to create a network. A dense layer is a fully connected layer

regression_fcn.add(layers.Dense(units=32, activation='relu', input_shape=(3,))) #First hidden layer needs to specify the shape of the feature data it will receive from the input layer

regression_fcn.add(layers.Dense(units=32, activation='relu')) #Hidden layer which has 32 neurons and uses the Relu activation function to learn nonlinearity. Does not need an input_shape parameter

regression_fcn.add(layers.Dense(units=1))# Output layer which does not use the Sigmoid function for binary classification

regression_fcn.compile(loss='mse', optimizer='adam', metrics='mae')#Network will minimize the mean square error loss function using the Adam optimizer and will evaluate success using mae metric

In [None]:
regression_fcn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 32)                128       
                                                                 
 dense_4 (Dense)             (None, 32)                1056      
                                                                 
 dense_5 (Dense)             (None, 1)                 33        
                                                                 
Total params: 1,217
Trainable params: 1,217
Non-trainable params: 0
_________________________________________________________________


In [None]:
regression_fcn.fit(qfeatures_train, targetvalue_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f9031869a30>

In [None]:
regression_fcn.evaluate(qfeatures_test, targetvalue_test)



[6.64080810546875, 1.680739402770996]

In [None]:
regression_fcn.predict(qmarch)



array([[1.9663868]], dtype=float32)