### Import dependencies

In [12]:
import pandas as pd
import numpy as np
from functools import reduce
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Merge datasets

In [13]:
# Read in data
cpi = pd.read_csv("resources/cpi_final.csv")
gdp = pd.read_csv("resources/gdp_final.csv")
gdp_pct = pd.read_csv("resources/gdp_pct_chg_final.csv")
houst = pd.read_csv("resources/housing_starts_final.csv")
opg = pd.read_csv("resources/output_gap_final.csv")
rec_dt = pd.read_csv("resources/recession_dates_final.csv")
unrate = pd.read_csv("resources/unemployment_rate_final.csv")
fed_funds = pd.read_csv("resources/fed_funds_final.csv")
yield10_2 = pd.read_csv("resources/10YT_minus_2YT_final.csv")
fred = pd.read_csv("resources/FRED_data.csv")

In [14]:
# Combine all data sets into one data frame
dfs = [cpi, gdp, gdp_pct, houst, opg, rec_dt, unrate, fed_funds, yield10_2, fred]
df = reduce(lambda left,right: pd.merge(left,right,on=['quarter'],how='outer'), dfs)
df.head()

Unnamed: 0,quarter,avg_consumer_price_index,date_x,gdp,date_y,gdp_pct_change,avg_housing_starts,date_x.1,output_gap,date_y.1,...,nat_rate_of_unemp_long_term_PCH,personal_consumption_expenditures,personal_consumption_expenditures_CCA,personal_consumption_expenditures_CCH,personal_consumption_expenditures_CH1,personal_consumption_expenditures_CHG,personal_consumption_expenditures_LOG,personal_consumption_expenditures_PC1,personal_consumption_expenditures_PCA,personal_consumption_expenditures_PCH
0,1947Q1,21.7,1947-01-01,243.164,,,,,,1947-01-01,...,,,,,,,,,,
1,1947Q2,22.01,1947-04-01,245.968,1947-04-01,4.7,,,,1947-04-01,...,,,,,,,,,,
2,1947Q3,22.49,1947-07-01,249.585,1947-07-01,6.0,,,,1947-07-01,...,,,,,,,,,,
3,1947Q4,23.126667,1947-10-01,259.745,1947-10-01,17.3,,,,1947-10-01,...,,,,,,,,,,
4,1948Q1,23.616667,1948-01-01,265.742,1948-01-01,9.6,,,,1948-01-01,...,,,,,,,,,,


In [15]:
# Drop date columns
df = df.drop(columns=['date_x','date_y'])

In [16]:
# Sort data frame by quarter
df = df.sort_values(by=['quarter'])

In [17]:
# Check dataset before removing nulls
df.tail()

Unnamed: 0,quarter,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,target,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,...,nat_rate_of_unemp_long_term_PCH,personal_consumption_expenditures,personal_consumption_expenditures_CCA,personal_consumption_expenditures_CCH,personal_consumption_expenditures_CH1,personal_consumption_expenditures_CHG,personal_consumption_expenditures_LOG,personal_consumption_expenditures_PC1,personal_consumption_expenditures_PCA,personal_consumption_expenditures_PCH
287,2018Q4,252.759,20897.804,2.9,1185.0,0.592021,0.0,3.566667,2.217097,0.152641,...,-0.1,14211.92,2.7,0.7,625.653,97.361,9.6,4.6,2.8,0.7
288,2019Q1,253.311333,21098.827,3.9,1213.0,0.848147,0.0,4.133333,2.401311,0.083088,...,-0.1,14266.25,1.5,0.4,537.893,54.33,9.6,3.9,1.5,0.4
289,2019Q2,255.139333,21340.267,4.7,1255.666667,0.828815,,3.5,2.397813,-0.001457,...,-0.1,14511.176,6.8,1.7,571.348,244.926,9.6,4.1,7.0,1.7
290,2019Q3,256.273,,,1282.0,,,3.7,2.197813,-0.083409,...,,,,,,,,,,
311,2019Q4,,,,,,,,1.845625,-0.160245,...,,,,,,,,,,


In [18]:
# Drop rows with missing values
df = df.dropna()

In [19]:
# Check dataset after removing nulls
df.tail()

Unnamed: 0,quarter,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,target,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,...,nat_rate_of_unemp_long_term_PCH,personal_consumption_expenditures,personal_consumption_expenditures_CCA,personal_consumption_expenditures_CCH,personal_consumption_expenditures_CH1,personal_consumption_expenditures_CHG,personal_consumption_expenditures_LOG,personal_consumption_expenditures_PC1,personal_consumption_expenditures_PCA,personal_consumption_expenditures_PCH
284,2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,0.0,4.333333,1.448966,0.204683,...,-0.1,13728.357,4.2,1.0,623.938,142.09,9.5,4.8,4.2,1.0
285,2018Q2,250.578667,20510.177,7.1,1259.666667,0.589182,0.0,3.833333,1.727176,0.192007,...,-0.1,13939.828,6.1,1.5,727.327,211.471,9.5,5.5,6.3,1.5
286,2018Q3,251.828667,20749.752,4.8,1233.0,0.821959,0.0,3.866667,1.923492,0.113663,...,-0.1,14114.559,5.0,1.2,769.506,174.731,9.6,5.8,5.1,1.3
287,2018Q4,252.759,20897.804,2.9,1185.0,0.592021,0.0,3.566667,2.217097,0.152641,...,-0.1,14211.92,2.7,0.7,625.653,97.361,9.6,4.6,2.8,0.7
288,2019Q1,253.311333,21098.827,3.9,1213.0,0.848147,0.0,4.133333,2.401311,0.083088,...,-0.1,14266.25,1.5,0.4,537.893,54.33,9.6,3.9,1.5,0.4


In [20]:
# Set index to quarter
df = df.set_index('quarter')

In [21]:
# Rename target column
df = df.rename(columns={'target':'recession_actual'})
df.head()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,recession_actual,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,...,nat_rate_of_unemp_long_term_PCH,personal_consumption_expenditures,personal_consumption_expenditures_CCA,personal_consumption_expenditures_CCH,personal_consumption_expenditures_CH1,personal_consumption_expenditures_CHG,personal_consumption_expenditures_LOG,personal_consumption_expenditures_PC1,personal_consumption_expenditures_PCA,personal_consumption_expenditures_PCH
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1976Q3,57.3,1886.558,7.6,1557.0,-2.199151,0.0,7.6,5.283478,0.016956,0.100618,...,0.1,1158.806,10.2,2.6,111.614,29.266,7.1,10.7,10.8,2.6
1976Q4,58.133333,1934.273,10.5,1691.333333,-2.246705,0.0,7.333333,4.874239,-0.077456,0.211941,...,0.1,1192.408,11.4,2.9,116.185,33.602,7.1,10.8,12.1,2.9
1977Q1,59.2,1988.648,11.7,1844.333333,-1.877175,0.0,8.233333,4.660667,-0.043817,0.148254,...,0.1,1228.212,11.8,3.0,118.304,35.804,7.1,10.7,12.6,3.0
1977Q2,60.233333,2055.909,14.2,1918.666667,-0.776696,0.0,6.933333,5.157473,0.106595,0.332835,...,0.1,1255.98,8.9,2.2,126.44,27.768,7.1,11.2,9.4,2.3
1977Q3,61.066667,2118.473,12.7,2009.0,0.186001,0.0,6.8,5.816413,0.127764,0.344309,...,0.0,1286.905,9.7,2.4,128.099,30.925,7.2,11.1,10.2,2.5


### Shift data with sliding window technique

In [22]:
df['recession_1q_out'] = df['recession_actual'].shift(-1)
df['recession_2q_out'] = df['recession_actual'].shift(-2)
df['recession_4q_out'] = df['recession_actual'].shift(-4)

In [23]:
# Create three datasets -- 1 for each model (recession 1Qtr out, 2Qtrs out, 4Qtrs out)
df_q1 = df.drop(columns=['recession_2q_out','recession_4q_out','recession_actual'])
df_q2 = df.drop(columns=['recession_4q_out','recession_1q_out','recession_actual'])
df_q4 = df.drop(columns=['recession_1q_out','recession_2q_out','recession_actual'])

In [24]:
# Delete missing values
df_q1 = df_q1.dropna()
df_q2 = df_q2.dropna()
df_q4 = df_q4.dropna()
df_q4.tail()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,...,personal_consumption_expenditures,personal_consumption_expenditures_CCA,personal_consumption_expenditures_CCH,personal_consumption_expenditures_CH1,personal_consumption_expenditures_CHG,personal_consumption_expenditures_LOG,personal_consumption_expenditures_PC1,personal_consumption_expenditures_PCA,personal_consumption_expenditures_PCH,recession_4q_out
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017Q1,243.83,19190.431,4.2,1230.666667,-0.861917,4.866667,0.698889,0.55949,0.09883,1.203333,...,13104.419,4.4,1.1,580.895,144.652,9.5,4.6,4.5,1.1,0.0
2017Q2,244.065,19356.649,3.5,1169.333333,-0.752038,4.233333,0.947363,0.355527,0.098588,0.97,...,13212.501,3.3,0.8,524.24,108.082,9.5,4.1,3.3,0.8,0.0
2017Q3,245.368333,19611.704,5.4,1175.333333,-0.396155,4.4,1.153696,0.217797,0.024029,0.88,...,13345.053,4.0,1.0,522.673,132.552,9.5,4.1,4.1,1.0,0.0
2017Q4,247.273333,19918.91,6.4,1259.666667,0.033653,3.9,1.202778,0.042543,0.09912,0.673333,...,13586.267,7.2,1.8,626.5,241.214,9.5,4.8,7.4,1.8,0.0
2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,4.333333,1.448966,0.204683,0.083902,0.596667,...,13728.357,4.2,1.0,623.938,142.09,9.5,4.8,4.2,1.0,0.0


In [25]:
# Define y variables
y1 = df_q1['recession_1q_out']
y2 = df_q2['recession_2q_out']
y3 = df_q4['recession_4q_out']

In [26]:
# Drop target
df_q1 = df_q1.drop(columns=['recession_1q_out'])
df_q2 = df_q2.drop(columns=['recession_2q_out'])
df_q4 = df_q4.drop(columns=['recession_4q_out'])

In [27]:
# Define X
X_q1 = df_q1
X_q2 = df_q2
X_q4 = df_q4

### Split and scale data

In [28]:
# Split data into training and testing
X1_train, X1_test, y1_train, y1_test=train_test_split(X_q1,y1, train_size=0.8, random_state=42, stratify=y1)
X2_train, X2_test, y2_train, y2_test=train_test_split(X_q2,y2, train_size=0.8, random_state=42, stratify=y2)
X3_train, X3_test, y3_train, y3_test=train_test_split(X_q4,y3, train_size=0.8, random_state=42, stratify=y3)

In [48]:
# Remove shuffle to see if model performs better
# Split data into training and testing
# X1_train, X1_test, y1_train, y1_test=train_test_split(X_q1,y1, train_size=0.8, random_state=42, shuffle=False)
# X2_train, X2_test, y2_train, y2_test=train_test_split(X_q2,y2, train_size=0.8, random_state=42, shuffle=False)
# X3_train, X3_test, y3_train, y3_test=train_test_split(X_q4,y3, train_size=0.8, random_state=42, shuffle=False)

In [31]:
# Create scaler object
X1_scaler = StandardScaler().fit(X1_train)
X2_scaler = StandardScaler().fit(X2_train)
X3_scaler = StandardScaler().fit(X3_train)

In [32]:
# Scale training data
X1_train_scaled = X1_scaler.transform(X1_train)
X2_train_scaled = X2_scaler.transform(X2_train)
X3_train_scaled = X3_scaler.transform(X3_train)

# Scale testing data
X1_test_scaled = X1_scaler.transform(X1_test)
X2_test_scaled = X2_scaler.transform(X2_test)
X3_test_scaled = X3_scaler.transform(X3_test)

In [33]:
X1_train_scaled.shape

(136, 89)

### Reshape data to fit LSTM format

In [67]:
# Method to reshape data
def reshape_data(obj):
    reshaped_obj = np.reshape(obj, (obj.shape[0], obj.shape[1], 1))
    return reshaped_obj

In [68]:
# Reshape training data
reshaped_X1_train_scaled = reshape_data(X1_train_scaled)
reshaped_X2_train_scaled = reshape_data(X2_train_scaled)
reshaped_X3_train_scaled = reshape_data(X3_train_scaled)

In [69]:
# Reshape testing data
reshaped_X1_test_scaled = reshape_data(X1_test_scaled)
reshaped_X2_test_scaled = reshape_data(X2_test_scaled)
reshaped_X3_test_scaled = reshape_data(X3_test_scaled)

## Build Model

In [75]:
# Initialize model
model = Sequential()

In [76]:
# Add layers
model.add(LSTM(128, input_shape=(reshaped_X1_train_scaled.shape[1],1), return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(LSTM(128))
# model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(2, activation='softmax'))

In [77]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

**Train and predict on X1-Y1 data (recession 1 quarter out)**

In [78]:
# Fit the model to the training data
# Shuffle True/False to randomize the training data rows being fed into the model
model.fit(reshaped_X1_train_scaled, y1_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
127/127 - 18s - loss: 1.2331 - acc: 0.4488
Epoch 2/100
127/127 - 1s - loss: 0.9149 - acc: 0.5748
Epoch 3/100
127/127 - 1s - loss: 0.7144 - acc: 0.7008
Epoch 4/100
127/127 - 1s - loss: 0.6271 - acc: 0.7559
Epoch 5/100
127/127 - 1s - loss: 0.5518 - acc: 0.8189
Epoch 6/100
127/127 - 1s - loss: 0.5122 - acc: 0.8189
Epoch 7/100
127/127 - 1s - loss: 0.4326 - acc: 0.8819
Epoch 8/100
127/127 - 1s - loss: 0.5720 - acc: 0.8110
Epoch 9/100
127/127 - 1s - loss: 0.4895 - acc: 0.8189
Epoch 10/100
127/127 - 1s - loss: 0.5411 - acc: 0.7953
Epoch 11/100
127/127 - 1s - loss: 0.4774 - acc: 0.8189
Epoch 12/100
127/127 - 1s - loss: 0.4550 - acc: 0.8425
Epoch 13/100
127/127 - 1s - loss: 0.4550 - acc: 0.8189
Epoch 14/100
127/127 - 1s - loss: 0.4537 - acc: 0.8268
Epoch 15/100
127/127 - 1s - loss: 0.3812 - acc: 0.8425
Epoch 16/100
127/127 - 1s - loss: 0.4191 - acc: 0.8346
Epoch 17/100
127/127 - 1s - loss: 0.4179 - acc: 0.8425
Epoch 18/100
127/127 - 1s - loss: 0.4250 - acc: 0.8504
Epoch 19/100
127/1

<tensorflow.python.keras.callbacks.History at 0x1a6baa0e48>

In [79]:
# Validate model using test data
model_loss1, model_accuracy1 = model.evaluate(reshaped_X1_test_scaled, y1_test, verbose=2)

43/43 - 3s - loss: 1.1688 - acc: 0.7674


In [28]:
# Make predictions using test data
predictions1 = model.predict_classes(reshaped_X1_test_scaled)
predictions1

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
X1_test

**Train and predict on X2-Y2 data (recession 2 quarters out)**

In [32]:
# Fit the model to the training data
model.fit(reshaped_X2_train_scaled, y2_train, epochs=100, shuffle=False, verbose=2)

Epoch 1/100
135/135 - 7s - loss: 0.4325 - acc: 0.9111
Epoch 2/100
135/135 - 7s - loss: 0.2475 - acc: 0.9185
Epoch 3/100
135/135 - 7s - loss: 0.2704 - acc: 0.9259
Epoch 4/100
135/135 - 7s - loss: 0.1096 - acc: 0.9704
Epoch 5/100
135/135 - 7s - loss: 0.2036 - acc: 0.9185
Epoch 6/100
135/135 - 7s - loss: 0.1546 - acc: 0.9556
Epoch 7/100
135/135 - 7s - loss: 0.1493 - acc: 0.9333
Epoch 8/100
135/135 - 7s - loss: 0.1495 - acc: 0.9556
Epoch 9/100
135/135 - 7s - loss: 0.1225 - acc: 0.9630
Epoch 10/100
135/135 - 7s - loss: 0.0866 - acc: 0.9630
Epoch 11/100
135/135 - 7s - loss: 0.0649 - acc: 0.9926
Epoch 12/100
135/135 - 7s - loss: 0.0541 - acc: 0.9926
Epoch 13/100
135/135 - 7s - loss: 0.0535 - acc: 0.9852
Epoch 14/100
135/135 - 7s - loss: 0.0730 - acc: 0.9852
Epoch 15/100
135/135 - 7s - loss: 0.0545 - acc: 0.9852
Epoch 16/100
135/135 - 7s - loss: 0.0397 - acc: 0.9926
Epoch 17/100
135/135 - 7s - loss: 0.0667 - acc: 0.9778
Epoch 18/100
135/135 - 7s - loss: 0.0630 - acc: 0.9926
Epoch 19/100
135/13

<tensorflow.python.keras.callbacks.History at 0x1a453f3278>

In [33]:
# Validate model using test data
model_loss2, model_accuracy2 = model.evaluate(reshaped_X2_test_scaled, y2_test, verbose=2)

34/34 - 0s - loss: 2.8982 - acc: 0.5588


In [34]:
# Make predictions using test data
predictions2 = model.predict_classes(reshaped_X2_test_scaled)
predictions2

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

**Train and predict on X3-Y3 data (recession 4 quarters out)**

In [52]:
# Fit the model to the training data
model.fit(reshaped_X3_train_scaled, y3_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
83/83 - 5s - loss: 0.7856 - acc: 0.8916
Epoch 2/100
83/83 - 5s - loss: 0.5266 - acc: 0.9398
Epoch 3/100
83/83 - 5s - loss: 0.3128 - acc: 0.9157
Epoch 4/100
83/83 - 4s - loss: 0.3255 - acc: 0.8916
Epoch 5/100
83/83 - 4s - loss: 0.2946 - acc: 0.9036
Epoch 6/100
83/83 - 5s - loss: 0.1655 - acc: 0.9036
Epoch 7/100
83/83 - 5s - loss: 0.2359 - acc: 0.9157
Epoch 8/100
83/83 - 5s - loss: 0.1651 - acc: 0.9277
Epoch 9/100
83/83 - 6s - loss: 0.1412 - acc: 0.9639
Epoch 10/100
83/83 - 5s - loss: 0.1275 - acc: 0.9518
Epoch 11/100
83/83 - 5s - loss: 0.1329 - acc: 0.9518
Epoch 12/100
83/83 - 4s - loss: 0.1595 - acc: 0.9036
Epoch 13/100
83/83 - 5s - loss: 0.1147 - acc: 0.9518
Epoch 14/100
83/83 - 5s - loss: 0.1514 - acc: 0.9157
Epoch 15/100
83/83 - 5s - loss: 0.1236 - acc: 0.9036
Epoch 16/100
83/83 - 6s - loss: 0.0892 - acc: 0.9880
Epoch 17/100
83/83 - 5s - loss: 0.0859 - acc: 0.9759
Epoch 18/100
83/83 - 5s - loss: 0.1016 - acc: 0.9518
Epoch 19/100
83/83 - 5s - loss: 0.0670 - acc: 0.9759
Ep

<tensorflow.python.keras.callbacks.History at 0x1a40d3a2e8>

In [35]:
# Validate model using test data
model_loss3, model_accuracy3 = model.evaluate(reshaped_X3_test_scaled, y3_test, verbose=2)

34/34 - 0s - loss: 2.8791 - acc: 0.5882


In [56]:
# Make predictions using test data
predictions3 = model.predict_classes(reshaped_X3_test_scaled)
predictions3

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
X3_test