### Import dependencies

In [1]:
import pandas as pd
import numpy as np
from functools import reduce
# import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Merge datasets

In [2]:
# Read in data
cpi = pd.read_csv("../resources/cpi_final.csv")
gdp = pd.read_csv("../resources/gdp_final.csv")
gdp_pct = pd.read_csv("../resources/gdp_pct_chg_final.csv")
houst = pd.read_csv("../resources/housing_starts_final.csv")
opg = pd.read_csv("../resources/output_gap_final.csv")
rec_dt = pd.read_csv("../resources/recession_dates_final.csv")
unrate = pd.read_csv("../resources/unemployment_rate_final.csv")
fed_funds = pd.read_csv("../resources/fed_funds_final.csv")
yield10_2 = pd.read_csv("../resources/10YT_minus_2YT_final.csv")

In [3]:
# Combine all data sets into one data frame
dfs = [cpi, gdp, gdp_pct, houst, opg, rec_dt, unrate, fed_funds, yield10_2]
df = reduce(lambda  left,right: pd.merge(left,right,on=['quarter'],how='outer'), dfs)
df.head()

Unnamed: 0,quarter,avg_consumer_price_index,date_x,gdp,date_y,gdp_pct_change,avg_housing_starts,date_x.1,output_gap,date_y.1,target,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter
0,1947Q1,21.7,1947-01-01,243.164,,,,,,1947-01-01,0.0,,,,,,
1,1947Q2,22.01,1947-04-01,245.968,1947-04-01,4.7,,,,1947-04-01,0.0,,,,,,
2,1947Q3,22.49,1947-07-01,249.585,1947-07-01,6.0,,,,1947-07-01,0.0,,,,,,
3,1947Q4,23.126667,1947-10-01,259.745,1947-10-01,17.3,,,,1947-10-01,0.0,,,,,,
4,1948Q1,23.616667,1948-01-01,265.742,1948-01-01,9.6,,,,1948-01-01,0.0,4.4,,,,,


In [4]:
# Drop date columns
df = df.iloc[:,[0,1,3,5,6,8,10,11,12,13,14,15,16]]

In [5]:
# Sort data frame by quarter
df = df.sort_values(by=['quarter'])

In [6]:
# Drop rows with missing values
df = df.dropna()

In [7]:
# Set index to quarter
df = df.set_index('quarter')

In [9]:
# Rename target column
df = df.rename(columns={'target':'recession_actual'})
df.head()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,recession_actual
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1976Q3,57.3,1886.558,7.6,1557.0,-2.199151,7.6,5.283478,0.016956,0.100618,1.096667,0.370833,0.0
1976Q4,58.133333,1934.273,10.5,1691.333333,-2.246705,7.333333,4.874239,-0.077456,0.211941,1.466667,0.337386,0.0
1977Q1,59.2,1988.648,11.7,1844.333333,-1.877175,8.233333,4.660667,-0.043817,0.148254,1.326667,-0.095455,0.0
1977Q2,60.233333,2055.909,14.2,1918.666667,-0.776696,6.933333,5.157473,0.106595,0.332835,1.256667,-0.052764,0.0
1977Q3,61.066667,2118.473,12.7,2009.0,0.186001,6.8,5.816413,0.127764,0.344309,0.826667,-0.342175,0.0


### Shift data with sliding window technique

In [10]:
df['recession_1q_out'] = df['recession_actual'].shift(-1)
df['recession_2q_out'] = df['recession_actual'].shift(-2)
df['recession_4q_out'] = df['recession_actual'].shift(-4)

In [11]:
# Delete missing values
df = df.dropna()
df.tail()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,recession_actual,recession_1q_out,recession_2q_out,recession_4q_out
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017Q1,243.83,19190.431,4.2,1230.666667,-0.861917,4.866667,0.698889,0.55949,0.09883,1.203333,0.071217,0.0,0.0,0.0,0.0
2017Q2,244.065,19356.649,3.5,1169.333333,-0.752038,4.233333,0.947363,0.355527,0.098588,0.97,-0.193906,0.0,0.0,0.0,0.0
2017Q3,245.368333,19611.704,5.4,1175.333333,-0.396155,4.4,1.153696,0.217797,0.024029,0.88,-0.092784,0.0,0.0,0.0,0.0
2017Q4,247.273333,19918.91,6.4,1259.666667,0.033653,3.9,1.202778,0.042543,0.09912,0.673333,-0.234848,0.0,0.0,0.0,0.0
2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,4.333333,1.448966,0.204683,0.083902,0.596667,-0.113861,0.0,0.0,0.0,0.0


In [12]:
# Define y variables
y1 = df['recession_1q_out']
y2 = df['recession_2q_out']
y3 = df['recession_4q_out']

In [13]:
# Drop recession columns
df = df.drop(columns=['recession_actual','recession_1q_out','recession_2q_out','recession_4q_out'])

In [14]:
# Define X
X = df

### Split and scale data

In [15]:
# Split data into training and testing
X1_train, X1_test, y1_train, y1_test=train_test_split(X,y1, train_size=0.8, random_state=42, stratify=y1)
X2_train, X2_test, y2_train, y2_test=train_test_split(X,y2, train_size=0.8, random_state=42, stratify=y2)
X3_train, X3_test, y3_train, y3_test=train_test_split(X,y3, train_size=0.8, random_state=42, stratify=y3)

In [16]:
# Create scaler object
X1_scaler = StandardScaler().fit(X1_train)
X2_scaler = StandardScaler().fit(X2_train)
X3_scaler = StandardScaler().fit(X3_train)

In [17]:
# Scale training data
X1_train_scaled = X1_scaler.transform(X1_train)
X2_train_scaled = X2_scaler.transform(X2_train)
X3_train_scaled = X3_scaler.transform(X3_train)

# Scale testing data
X1_test_scaled = X1_scaler.transform(X1_test)
X2_test_scaled = X2_scaler.transform(X2_test)
X3_test_scaled = X3_scaler.transform(X3_test)

### Reshape data using np.reshape

In [18]:
# Method to reshape data
def reshape_data(obj):
    reshaped_obj = np.reshape(obj, (obj.shape[0], obj.shape[1], 1))
    return reshaped_obj

In [19]:
# Reshape training data
reshaped_X1_train_scaled = reshape_data(X1_train_scaled)
reshaped_X2_train_scaled = reshape_data(X2_train_scaled)
reshaped_X3_train_scaled = reshape_data(X3_train_scaled)

In [20]:
# Reshape testing data
reshaped_X1_test_scaled = reshape_data(X1_test_scaled)
reshaped_X2_test_scaled = reshape_data(X2_test_scaled)
reshaped_X3_test_scaled = reshape_data(X3_test_scaled)

## Build Model

In [35]:
# Initialize model
model = Sequential()

In [36]:
# Add layers
model.add(LSTM(128, input_shape=(reshaped_X1_train_scaled.shape[1],1), return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(2, activation='softmax'))

In [37]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

**Train and predict on X1-Y1 data (recession 1 quarter out)**

In [38]:
# Fit the model to the training data
model.fit(reshaped_X1_train_scaled, y1_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
133/133 - 11s - loss: 0.9671 - acc: 0.5263
Epoch 2/100
133/133 - 1s - loss: 0.6095 - acc: 0.7068
Epoch 3/100
133/133 - 1s - loss: 0.6236 - acc: 0.7368
Epoch 4/100
133/133 - 1s - loss: 0.6444 - acc: 0.6466
Epoch 5/100
133/133 - 1s - loss: 0.6047 - acc: 0.7293
Epoch 6/100
133/133 - 1s - loss: 0.5466 - acc: 0.8045
Epoch 7/100
133/133 - 1s - loss: 0.5312 - acc: 0.7970
Epoch 8/100
133/133 - 1s - loss: 0.5087 - acc: 0.7594
Epoch 9/100
133/133 - 1s - loss: 0.4280 - acc: 0.8120
Epoch 10/100
133/133 - 1s - loss: 0.4167 - acc: 0.8421
Epoch 11/100
133/133 - 1s - loss: 0.4459 - acc: 0.8346
Epoch 12/100
133/133 - 1s - loss: 0.5366 - acc: 0.7895
Epoch 13/100
133/133 - 1s - loss: 0.4497 - acc: 0.7895
Epoch 14/100
133/133 - 1s - loss: 0.3706 - acc: 0.8421
Epoch 15/100
133/133 - 1s - loss: 0.4242 - acc: 0.7970
Epoch 16/100
133/133 - 1s - loss: 0.5177 - acc: 0.7669
Epoch 17/100
133/133 - 1s - loss: 0.4611 - acc: 0.7744
Epoch 18/100
133/133 - 1s - loss: 0.3872 - acc: 0.8045
Epoch 19/100
133/1

<tensorflow.python.keras.callbacks.History at 0x1a460792e8>

In [39]:
# Validate model using test data
model_loss1, model_accuracy1 = model.evaluate(reshaped_X1_test_scaled, y1_test, verbose=2)

34/34 - 2s - loss: 0.7040 - acc: 0.8529


In [40]:
# Make predictions using test data
predictions1 = model.predict_classes(reshaped_X1_test_scaled)
predictions1

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

**Train and predict on X2-Y2 data (recession 2 quarters out)**

In [41]:
# Fit the model to the training data
model.fit(reshaped_X2_train_scaled, y2_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
133/133 - 1s - loss: 0.4118 - acc: 0.8346
Epoch 2/100
133/133 - 1s - loss: 0.3624 - acc: 0.8571
Epoch 3/100
133/133 - 1s - loss: 0.3126 - acc: 0.8872
Epoch 4/100
133/133 - 1s - loss: 0.3029 - acc: 0.8797
Epoch 5/100
133/133 - 1s - loss: 0.3092 - acc: 0.8947
Epoch 6/100
133/133 - 1s - loss: 0.3223 - acc: 0.8797
Epoch 7/100
133/133 - 1s - loss: 0.2287 - acc: 0.8947
Epoch 8/100
133/133 - 1s - loss: 0.2356 - acc: 0.8872
Epoch 9/100
133/133 - 1s - loss: 0.2561 - acc: 0.8872
Epoch 10/100
133/133 - 1s - loss: 0.2694 - acc: 0.8571
Epoch 11/100
133/133 - 1s - loss: 0.2850 - acc: 0.8571
Epoch 12/100
133/133 - 1s - loss: 0.2706 - acc: 0.8797
Epoch 13/100
133/133 - 1s - loss: 0.2943 - acc: 0.8647
Epoch 14/100
133/133 - 1s - loss: 0.2834 - acc: 0.8346
Epoch 15/100
133/133 - 1s - loss: 0.2106 - acc: 0.8947
Epoch 16/100
133/133 - 1s - loss: 0.2434 - acc: 0.8797
Epoch 17/100
133/133 - 1s - loss: 0.2730 - acc: 0.8496
Epoch 18/100
133/133 - 1s - loss: 0.2619 - acc: 0.8797
Epoch 19/100
133/13

<tensorflow.python.keras.callbacks.History at 0x1a460cac50>

In [42]:
# Validate model using test data
model_loss2, model_accuracy2 = model.evaluate(reshaped_X2_test_scaled, y2_test, verbose=2)

34/34 - 0s - loss: 0.6421 - acc: 0.7647


In [43]:
# Make predictions using test data
predictions2 = model.predict_classes(reshaped_X2_test_scaled)
predictions2

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0])

**Train and predict on X3-Y3 data (recession 4 quarters out)**

In [44]:
# Fit the model to the training data
model.fit(reshaped_X3_train_scaled, y3_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
133/133 - 1s - loss: 0.6031 - acc: 0.7669
Epoch 2/100
133/133 - 1s - loss: 0.4961 - acc: 0.8045
Epoch 3/100
133/133 - 1s - loss: 0.4874 - acc: 0.7820
Epoch 4/100
133/133 - 1s - loss: 0.3469 - acc: 0.8195
Epoch 5/100
133/133 - 1s - loss: 0.2964 - acc: 0.8421
Epoch 6/100
133/133 - 1s - loss: 0.2852 - acc: 0.8797
Epoch 7/100
133/133 - 1s - loss: 0.3111 - acc: 0.8872
Epoch 8/100
133/133 - 1s - loss: 0.2386 - acc: 0.9023
Epoch 9/100
133/133 - 1s - loss: 0.2844 - acc: 0.8947
Epoch 10/100
133/133 - 1s - loss: 0.2371 - acc: 0.8797
Epoch 11/100
133/133 - 1s - loss: 0.2716 - acc: 0.9023
Epoch 12/100
133/133 - 1s - loss: 0.2419 - acc: 0.9023
Epoch 13/100
133/133 - 1s - loss: 0.2495 - acc: 0.8947
Epoch 14/100
133/133 - 1s - loss: 0.2152 - acc: 0.9023
Epoch 15/100
133/133 - 1s - loss: 0.2163 - acc: 0.8797
Epoch 16/100
133/133 - 1s - loss: 0.2180 - acc: 0.8872
Epoch 17/100
133/133 - 1s - loss: 0.2411 - acc: 0.9023
Epoch 18/100
133/133 - 1s - loss: 0.2147 - acc: 0.8947
Epoch 19/100
133/13

<tensorflow.python.keras.callbacks.History at 0x1a4611f1d0>

In [45]:
# Validate model using test data
model_loss3, model_accuracy3 = model.evaluate(reshaped_X3_test_scaled, y3_test, verbose=2)

34/34 - 0s - loss: 0.1106 - acc: 0.9706


In [46]:
# Make predictions using test data
predictions3 = model.predict_classes(reshaped_X3_test_scaled)
predictions3

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])