### Import dependencies

In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Merge datasets

In [2]:
# Read in data
cpi = pd.read_csv("../resources/cpi_final.csv")
gdp = pd.read_csv("../resources/gdp_final.csv")
gdp_pct = pd.read_csv("../resources/gdp_pct_chg_final.csv")
houst = pd.read_csv("../resources/housing_starts_final.csv")
opg = pd.read_csv("../resources/output_gap_final.csv")
rec_dt = pd.read_csv("../resources/recession_dates_final.csv")
unrate = pd.read_csv("../resources/unemployment_rate_final.csv")
fed_funds = pd.read_csv("../resources/fed_funds_final.csv")
yield10_2 = pd.read_csv("../resources/10YT_minus_2YT_final.csv")

In [3]:
# Combine all data sets into one data frame
dfs = [cpi, gdp, gdp_pct, houst, opg, rec_dt, unrate, fed_funds, yield10_2]
df = reduce(lambda  left,right: pd.merge(left,right,on=['quarter'],how='outer'), dfs)
df.head()

Unnamed: 0,quarter,avg_consumer_price_index,date_x,gdp,date_y,gdp_pct_change,avg_housing_starts,date_x.1,output_gap,date_y.1,target,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter
0,1947Q1,21.7,1947-01-01,243.164,,,,,,1947-01-01,0.0,,,,,,
1,1947Q2,22.01,1947-04-01,245.968,1947-04-01,4.7,,,,1947-04-01,0.0,,,,,,
2,1947Q3,22.49,1947-07-01,249.585,1947-07-01,6.0,,,,1947-07-01,0.0,,,,,,
3,1947Q4,23.126667,1947-10-01,259.745,1947-10-01,17.3,,,,1947-10-01,0.0,,,,,,
4,1948Q1,23.616667,1948-01-01,265.742,1948-01-01,9.6,,,,1948-01-01,0.0,4.4,,,,,


In [4]:
# Drop date columns
df = df.iloc[:,[0,1,3,5,6,8,10,11,12,13,14,15,16]]

In [5]:
# Sort data frame by quarter
df = df.sort_values(by=['quarter'])

In [6]:
# Drop rows with missing values
df = df.dropna()

In [7]:
# Set index to quarter
df = df.set_index('quarter')

In [8]:
# Reorder columns
df = df[['avg_consumer_price_index', 'gdp', 'gdp_pct_change',
       'avg_housing_starts', 'output_gap', 'avg_unemployment_rate',
       'fed_funds_avg_rate', 'fed_funds_percent_change_prev_quarter',
       'fed_funds_st_dev_rate', '10YT_minus_2YT_avg',
       '10YT_minus_2YT_percent_change_prev_quarter', 'target']]

In [9]:
# Rename target column
df = df.rename(columns={'target':'recession_actual'})
df.head()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,recession_actual
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1976Q3,57.3,1886.558,7.6,1557.0,-2.199151,7.6,5.283478,0.016956,0.100618,1.096667,0.370833,0.0
1976Q4,58.133333,1934.273,10.5,1691.333333,-2.246705,7.333333,4.874239,-0.077456,0.211941,1.466667,0.337386,0.0
1977Q1,59.2,1988.648,11.7,1844.333333,-1.877175,8.233333,4.660667,-0.043817,0.148254,1.326667,-0.095455,0.0
1977Q2,60.233333,2055.909,14.2,1918.666667,-0.776696,6.933333,5.157473,0.106595,0.332835,1.256667,-0.052764,0.0
1977Q3,61.066667,2118.473,12.7,2009.0,0.186001,6.8,5.816413,0.127764,0.344309,0.826667,-0.342175,0.0


### Shift data with sliding window technique

In [10]:
df['recession_1q_out'] = df['recession_actual'].shift(-1)
df['recession_2q_out'] = df['recession_actual'].shift(-2)
df['recession_4q_out'] = df['recession_actual'].shift(-4)

In [11]:
# Delete missing values
df = df.dropna()
df.tail()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,recession_actual,recession_1q_out,recession_2q_out,recession_4q_out
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017Q1,243.83,19190.431,4.2,1230.666667,-0.861917,4.866667,0.698889,0.55949,0.09883,1.203333,0.071217,0.0,0.0,0.0,0.0
2017Q2,244.065,19356.649,3.5,1169.333333,-0.752038,4.233333,0.947363,0.355527,0.098588,0.97,-0.193906,0.0,0.0,0.0,0.0
2017Q3,245.368333,19611.704,5.4,1175.333333,-0.396155,4.4,1.153696,0.217797,0.024029,0.88,-0.092784,0.0,0.0,0.0,0.0
2017Q4,247.273333,19918.91,6.4,1259.666667,0.033653,3.9,1.202778,0.042543,0.09912,0.673333,-0.234848,0.0,0.0,0.0,0.0
2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,4.333333,1.448966,0.204683,0.083902,0.596667,-0.113861,0.0,0.0,0.0,0.0


In [12]:
# Define y variables
y1 = df['recession_1q_out']
y2 = df['recession_2q_out']
y3 = df['recession_4q_out']

In [13]:
# Drop recession columns
df = df.drop(columns=['recession_actual','recession_1q_out','recession_2q_out','recession_4q_out'])

In [14]:
# Define X
X = df

### Build RNN Model

In [15]:
# Split data into training and testing
X1_train, X1_test, y1_train, y1_test=train_test_split(X,y1, train_size=0.8, random_state=42, stratify=y1)
X2_train, X2_test, y2_train, y2_test=train_test_split(X,y2, train_size=0.8, random_state=42, stratify=y2)
X3_train, X3_test, y3_train, y3_test=train_test_split(X,y3, train_size=0.8, random_state=42, stratify=y3)

In [16]:
# Create scaler object
X1_scaler = StandardScaler().fit(X1_train)
X2_scaler = StandardScaler().fit(X2_train)
X3_scaler = StandardScaler().fit(X3_train)

In [17]:
# Scale training data
X1_train_scaled = X1_scaler.transform(X1_train)
X2_train_scaled = X2_scaler.transform(X2_train)
X3_train_scaled = X3_scaler.transform(X3_train)

# Scale testing data
X1_test_scaled = X1_scaler.transform(X1_test)
X2_test_scaled = X2_scaler.transform(X2_test)
X3_test_scaled = X3_scaler.transform(X3_test)

### Create method to reshape data to 3D array, call method for all 3 data sets. Reshape data before training.
Reshape by adding new axes

In [21]:
# Reshape X1_train_scaled data
# Add axis so that the data shape is in the order: samples, time steps, features (1, 133, 11)
reshaped_X1_train = X1_train_scaled[np.newaxis,:,:]
reshaped_X1_train.shape

(1, 133, 11)

In [22]:
# Original array order
X1_train_scaled

array([[-1.14136746, -1.14349716,  0.66416885, ...,  0.32610991,
         0.0347151 ,  0.15504464],
       [-0.58177135, -0.68280148,  0.76158761, ..., -0.5117824 ,
        -1.02821626,  0.10618382],
       [ 1.01081681,  0.94550157, -1.72259067, ..., -0.59021189,
         1.45195692,  0.14254941],
       ...,
       [-0.45631251, -0.65160371, -0.94324062, ...,  0.68793501,
        -0.00588297,  0.13491829],
       [ 1.41724134,  1.64276555, -0.69969373, ..., -0.61786422,
         0.41486069,  0.0119876 ],
       [-1.81275857, -1.39487028,  0.73723292, ..., -0.35280096,
        -0.59270966, -0.05287537]])

In [23]:
# Compare reshaped array order to before reshaping
reshaped_X1_train

array([[[-1.14136746, -1.14349716,  0.66416885, ...,  0.32610991,
          0.0347151 ,  0.15504464],
        [-0.58177135, -0.68280148,  0.76158761, ..., -0.5117824 ,
         -1.02821626,  0.10618382],
        [ 1.01081681,  0.94550157, -1.72259067, ..., -0.59021189,
          1.45195692,  0.14254941],
        ...,
        [-0.45631251, -0.65160371, -0.94324062, ...,  0.68793501,
         -0.00588297,  0.13491829],
        [ 1.41724134,  1.64276555, -0.69969373, ..., -0.61786422,
          0.41486069,  0.0119876 ],
        [-1.81275857, -1.39487028,  0.73723292, ..., -0.35280096,
         -0.59270966, -0.05287537]]])

In [96]:
# Check y1_train shape
y1_train.shape

(133,)

In [108]:
# Add new axes and reshape y1_train data to fit LSTM input format
reshaped_y1_train = y1_train[np.newaxis,:,np.newaxis]
reshaped_y1_train.shape

(133, 1, 1)

Reshape using np.array (experimenting)

In [124]:
r2_X1_train_scaled = np.reshape(X1_train_scaled, (X1_train_scaled.shape[0], X1_train_scaled.shape[1], 1))
r2_X1_train_scaled.shape

(133, 11, 1)

In [127]:
# Initialize model
model = Sequential()

In [128]:
# Add layers
# model.add(LSTM(128, input_shape=(133,11), return_sequences=True))
model.add(LSTM((128), input_shape=(r2_X1_train_scaled.shape[1],1), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM((128), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

In [129]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

In [130]:
# View summary of model
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_55 (LSTM)               (None, 11, 128)           66560     
_________________________________________________________________
dropout_47 (Dropout)         (None, 11, 128)           0         
_________________________________________________________________
batch_normalization_36 (Batc (None, 11, 128)           512       
_________________________________________________________________
lstm_56 (LSTM)               (None, 11, 128)           131584    
_________________________________________________________________
dropout_48 (Dropout)         (None, 11, 128)           0         
_________________________________________________________________
batch_normalization_37 (Batc (None, 11, 128)           512       
_________________________________________________________________
lstm_57 (LSTM)               (None, 128)             

In [131]:
# Fit the model to the training data
model.fit(r2_X1_train_scaled, y1_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
133/133 - 11s - loss: 0.8517 - acc: 0.4887
Epoch 2/100
133/133 - 1s - loss: 0.4950 - acc: 0.7368
Epoch 3/100
133/133 - 1s - loss: 0.4743 - acc: 0.7970
Epoch 4/100
133/133 - 1s - loss: 0.5173 - acc: 0.8045
Epoch 5/100
133/133 - 1s - loss: 0.4686 - acc: 0.7970
Epoch 6/100
133/133 - 1s - loss: 0.3712 - acc: 0.8271
Epoch 7/100
133/133 - 1s - loss: 0.3692 - acc: 0.8421
Epoch 8/100
133/133 - 1s - loss: 0.3327 - acc: 0.8571
Epoch 9/100
133/133 - 1s - loss: 0.3914 - acc: 0.7820
Epoch 10/100
133/133 - 1s - loss: 0.4169 - acc: 0.7895
Epoch 11/100
133/133 - 1s - loss: 0.3531 - acc: 0.8571
Epoch 12/100
133/133 - 1s - loss: 0.3264 - acc: 0.8797
Epoch 13/100
133/133 - 1s - loss: 0.3370 - acc: 0.8797
Epoch 14/100
133/133 - 1s - loss: 0.3742 - acc: 0.8496
Epoch 15/100
133/133 - 1s - loss: 0.3609 - acc: 0.8722
Epoch 16/100
133/133 - 1s - loss: 0.4040 - acc: 0.7970
Epoch 17/100
133/133 - 1s - loss: 0.3564 - acc: 0.8346
Epoch 18/100
133/133 - 1s - loss: 0.3697 - acc: 0.8271
Epoch 19/100
133/1

<tensorflow.python.keras.callbacks.History at 0x1a6e151320>

In [None]:
# Evaluate Model