### Import dependencies

In [1]:
import pandas as pd
import numpy as np
from functools import reduce
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import datetime as dt
from sklearn.metrics import confusion_matrix, classification_report

### Merge datasets

In [2]:
# Read in data
cpi = pd.read_csv("resources/cpi_final.csv")
gdp = pd.read_csv("resources/gdp_final.csv")
gdp_pct = pd.read_csv("resources/gdp_pct_chg_final.csv")
houst = pd.read_csv("resources/housing_starts_final.csv")
opg = pd.read_csv("resources/output_gap_final.csv")
rec_dt = pd.read_csv("resources/recession_dates_final.csv")
unrate = pd.read_csv("resources/unemployment_rate_final.csv")
fed_funds = pd.read_csv("resources/fed_funds_final.csv")
yield10_2 = pd.read_csv("resources/10YT_minus_2YT_final.csv")
fred = pd.read_csv("resources/FRED_data.csv")

In [3]:
# Combine all data sets into one data frame
dfs = [cpi, gdp, gdp_pct, houst, opg, rec_dt, unrate, fed_funds, yield10_2, fred]
df = reduce(lambda left,right: pd.merge(left,right,on=['quarter'],how='outer'), dfs)

In [4]:
# Drop date columns
df = df.drop(columns=['date_x','date_y'])

In [5]:
# Sort data frame by quarter
df = df.sort_values(by=['quarter'])

In [6]:
# Drop rows with missing values
df = df.dropna()

In [7]:
# Set index to quarter
df = df.set_index('quarter')

In [8]:
# Rename target column
df = df.rename(columns={'target':'recession_actual'})
df.head()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,recession_actual,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,...,10YT_minus_2YT_percent_change_prev_quarter,real_disp_pers_inc,personal_consumption_exp_excl_food_energy,cpi_US_total,tot_public_debt_as_pct_of_gdp,gross_private_domestic_invest,M2_velocity,median_sls_price_houses_sold_US,nat_rate_of_unemp_long_term,personal_consumption_expenditures
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1976Q3,57.3,1886.558,7.6,1557.0,-2.199151,0.0,7.6,5.283478,0.016956,0.100618,...,0.370833,3.2,6.0,5.518087,33.64333,328.307,1.717,44400.0,6.217,1158.806
1976Q4,58.133333,1934.273,10.5,1691.333333,-2.246705,0.0,7.333333,4.874239,-0.077456,0.211941,...,0.337386,2.6,6.0,5.069403,33.78753,337.65,1.699,45500.0,6.223,1192.408
1977Q1,59.2,1988.648,11.7,1844.333333,-1.877175,0.0,8.233333,4.660667,-0.043817,0.148254,...,-0.095455,0.9,6.2,5.857741,33.65136,360.313,1.689,46300.0,6.227,1228.212
1977Q2,60.233333,2055.909,14.2,1918.666667,-0.776696,0.0,6.933333,5.157473,0.106595,0.332835,...,-0.052764,3.8,6.5,6.847698,32.80422,389.703,1.701,48900.0,6.232,1255.98
1977Q3,61.066667,2118.473,12.7,2009.0,0.186001,0.0,6.8,5.816413,0.127764,0.344309,...,-0.342175,5.7,6.6,6.682162,32.98791,414.134,1.713,48800.0,6.235,1286.905


### Shift data with sliding window technique

In [9]:
df['recession_1q_out'] = df['recession_actual'].shift(-1)
df['recession_2q_out'] = df['recession_actual'].shift(-2)
df['recession_4q_out'] = df['recession_actual'].shift(-4)

In [10]:
# Create three datasets -- 1 for each model (recession 1Qtr out, 2Qtrs out, 4Qtrs out)
df_q1 = df.drop(columns=['recession_2q_out','recession_4q_out','recession_actual'])
df_q2 = df.drop(columns=['recession_4q_out','recession_1q_out','recession_actual'])
df_q4 = df.drop(columns=['recession_1q_out','recession_2q_out','recession_actual'])

In [11]:
# Delete missing values
df_q1 = df_q1.dropna()
df_q2 = df_q2.dropna()
df_q4 = df_q4.dropna()

In [12]:
# Define y variables
y1 = df_q1['recession_1q_out']
y2 = df_q2['recession_2q_out']
y3 = df_q4['recession_4q_out']

In [13]:
# Drop target
df_q1 = df_q1.drop(columns=['recession_1q_out'])
df_q2 = df_q2.drop(columns=['recession_2q_out'])
df_q4 = df_q4.drop(columns=['recession_4q_out'])

In [14]:
# Define X
X_q1 = df_q1
X_q2 = df_q2
X_q4 = df_q4

### Split and scale data

In [15]:
# Split data into training and testing
X1_train, X1_test, y1_train, y1_test=train_test_split(X_q1, y1, train_size=0.8, random_state=42, stratify=y1)
X2_train, X2_test, y2_train, y2_test=train_test_split(X_q2, y2, train_size=0.8, random_state=42, stratify=y2)
X3_train, X3_test, y3_train, y3_test=train_test_split(X_q4, y3, train_size=0.8, random_state=42, stratify=y3)

In [16]:
# Create scaler object
X1_scaler = StandardScaler().fit(X1_train)
X2_scaler = StandardScaler().fit(X2_train)
X3_scaler = StandardScaler().fit(X3_train)

# X full scaler object
X1_full_scaler = StandardScaler().fit(X_q1)
X2_full_scaler = StandardScaler().fit(X_q2)
X3_full_scaler = StandardScaler().fit(X_q4)

In [17]:
# Scale training data
X1_train_scaled = X1_scaler.transform(X1_train)
X2_train_scaled = X2_scaler.transform(X2_train)
X3_train_scaled = X3_scaler.transform(X3_train)

# Scale testing data
X1_test_scaled = X1_scaler.transform(X1_test)
X2_test_scaled = X2_scaler.transform(X2_test)
X3_test_scaled = X3_scaler.transform(X3_test)

# Scale full X data (no splits)
X1_full_scaled = X1_full_scaler.transform(X_q1)
X2_full_scaled = X2_full_scaler.transform(X_q2)
X3_full_scaled = X3_full_scaler.transform(X_q4)

### Reshape data to fit LSTM format

In [18]:
# Method to reshape data
def reshape_data(obj):
    reshaped_obj = np.reshape(obj, (obj.shape[0], obj.shape[1], 1))
    return reshaped_obj

In [19]:
# Reshape training data
reshaped_X1_train_scaled = reshape_data(X1_train_scaled)
reshaped_X2_train_scaled = reshape_data(X2_train_scaled)
reshaped_X3_train_scaled = reshape_data(X3_train_scaled)

In [20]:
# Reshape testing data
reshaped_X1_test_scaled = reshape_data(X1_test_scaled)
reshaped_X2_test_scaled = reshape_data(X2_test_scaled)
reshaped_X3_test_scaled = reshape_data(X3_test_scaled)

In [21]:
# Reshape X_full
reshaped_X1_full = reshape_data(X1_full_scaled)
reshaped_X2_full = reshape_data(X2_full_scaled)
reshaped_X3_full = reshape_data(X3_full_scaled)

# Build Model

In [22]:
# Initialize model
model = Sequential()

In [23]:
# Add layers
model.add(LSTM(128, input_shape=(reshaped_X1_train_scaled.shape[1],1), return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(2, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [24]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

### Train and predict on X1-Y1 data (recession 1 quarter out)

In [25]:
# Fit the model to the training data
model.fit(reshaped_X1_train_scaled, y1_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

Train on 108 samples, validate on 28 samples
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/100
108/108 - 14s - loss: 1.2851 - acc: 0.3519 - val_loss: 0.6892 - val_acc: 0.6429
Epoch 2/100
108/108 - 1s - loss: 0.7912 - acc: 0.5833 - val_loss: 0.6841 - val_acc: 0.6429
Epoch 3/100
108/108 - 1s - loss: 0.8677 - acc: 0.5741 - val_loss: 0.6818 - val_acc: 0.6429
Epoch 4/100
108/108 - 1s - loss: 0.6519 - acc: 0.7037 - val_loss: 0.6801 - val_acc: 0.6429
Epoch 5/100
108/108 - 1s - loss: 0.6304 - acc: 0.7130 - val_loss: 0.6784 - val_acc: 0.6429
Epoch 6/100
108/108 - 1s - loss: 0.5673 - acc: 0.7407 - val_loss: 0.6791 - val_acc: 0.6429
Epoch 7/100
108/108 - 1s - loss: 0.4648 - acc: 0.8056 - val_loss: 0.6803 - val_acc: 0.6429
Epoch 8/100
108/108 - 1s - loss: 0.4089 - acc: 0.8704 - val_loss: 0.6797 - val_acc: 0.6429
Epoch 9/100
108/108 - 1s - loss: 0.4715 - acc: 0.7778 - val_loss: 0.6804 - val_acc: 0.6429
Epoch 10/100
108/108 - 1s - loss: 0.4449 

108/108 - 1s - loss: 0.1807 - acc: 0.9537 - val_loss: 1.6939 - val_acc: 0.6786
Epoch 87/100
108/108 - 1s - loss: 0.2401 - acc: 0.9074 - val_loss: 1.6681 - val_acc: 0.6786
Epoch 88/100
108/108 - 1s - loss: 0.1885 - acc: 0.9352 - val_loss: 1.6711 - val_acc: 0.6786
Epoch 89/100
108/108 - 1s - loss: 0.1413 - acc: 0.9352 - val_loss: 1.6573 - val_acc: 0.6786
Epoch 90/100
108/108 - 1s - loss: 0.1945 - acc: 0.9352 - val_loss: 1.5825 - val_acc: 0.7143
Epoch 91/100
108/108 - 1s - loss: 0.1729 - acc: 0.9352 - val_loss: 1.5766 - val_acc: 0.7143
Epoch 92/100
108/108 - 1s - loss: 0.1884 - acc: 0.9352 - val_loss: 1.5388 - val_acc: 0.6786
Epoch 93/100
108/108 - 1s - loss: 0.2434 - acc: 0.9352 - val_loss: 1.6082 - val_acc: 0.6786
Epoch 94/100
108/108 - 1s - loss: 0.1670 - acc: 0.9167 - val_loss: 1.6294 - val_acc: 0.7143
Epoch 95/100
108/108 - 1s - loss: 0.1222 - acc: 0.9630 - val_loss: 1.6474 - val_acc: 0.7143
Epoch 96/100
108/108 - 1s - loss: 0.1484 - acc: 0.9352 - val_loss: 1.6099 - val_acc: 0.7143
E

<tensorflow.python.keras.callbacks.History at 0x1a3ce55a58>

In [26]:
# Evaluate model using test data
model_loss1, model_accuracy1 = model.evaluate(reshaped_X1_test_scaled, y1_test, verbose=2)

34/34 - 0s - loss: 1.0003 - acc: 0.8529


In [27]:
# Make predictions using test data
predictions1 = model.predict_classes(reshaped_X1_test_scaled)

In [28]:
# Compare results
one_qtr_out = pd.DataFrame({"Predicted":predictions1, "Actual":y1_test})
# one_qtr_out

In [29]:
# Save model
name1 = f"shuffled-1q-out-{int(dt.datetime.now())}"
model.save(f"models/{name1}.h5")

#### Predict on full X1

In [30]:
pred_X1_full = model.predict_classes(reshaped_X1_full)
X1_full_results = pd.DataFrame({"Predicted":pred_X1_full, "Actual":y1})
X1_full_results.to_csv(f"resources/predictions/X1_full_shuffled_{int(dt.datetime.now())}.csv")
# X1_full_results.loc[X1_full_results["Actual"]==1]

#### Confusion Matrix on X1-Y1 data (recession 1 quarter out)

In [31]:
# Create confusion matrix on X1 model
con_mat = confusion_matrix(y1_test, predictions1)
print(con_mat)

[[29  0]
 [ 5  0]]


In [32]:
# Score model
print(classification_report(y1_test, predictions1))

              precision    recall  f1-score   support

         0.0       0.85      1.00      0.92        29
         1.0       0.00      0.00      0.00         5

    accuracy                           0.85        34
   macro avg       0.43      0.50      0.46        34
weighted avg       0.73      0.85      0.79        34



  'precision', 'predicted', average, warn_for)


### Train and predict on X2-Y2 data (recession 2 quarters out)

In [33]:
# Fit the model to the training data
model.fit(reshaped_X2_train_scaled, y2_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

Train on 108 samples, validate on 27 samples
Epoch 1/100
108/108 - 2s - loss: 0.4598 - acc: 0.8796 - val_loss: 1.5380 - val_acc: 0.6296
Epoch 2/100
108/108 - 2s - loss: 0.3847 - acc: 0.9167 - val_loss: 1.4471 - val_acc: 0.6667
Epoch 3/100
108/108 - 2s - loss: 0.4034 - acc: 0.8796 - val_loss: 1.4798 - val_acc: 0.6667
Epoch 4/100
108/108 - 1s - loss: 0.3588 - acc: 0.9074 - val_loss: 1.4635 - val_acc: 0.6667
Epoch 5/100
108/108 - 2s - loss: 0.4300 - acc: 0.8889 - val_loss: 1.5156 - val_acc: 0.6667
Epoch 6/100
108/108 - 2s - loss: 0.2173 - acc: 0.9259 - val_loss: 1.6702 - val_acc: 0.6296
Epoch 7/100
108/108 - 1s - loss: 0.2427 - acc: 0.9167 - val_loss: 1.7026 - val_acc: 0.6296
Epoch 8/100
108/108 - 2s - loss: 0.2810 - acc: 0.9074 - val_loss: 1.6614 - val_acc: 0.6667
Epoch 9/100
108/108 - 2s - loss: 0.2784 - acc: 0.8889 - val_loss: 1.5880 - val_acc: 0.6667
Epoch 10/100
108/108 - 2s - loss: 0.1753 - acc: 0.9352 - val_loss: 1.4921 - val_acc: 0.6667
Epoch 11/100
108/108 - 2s - loss: 0.2404 - a

Epoch 90/100
108/108 - 3s - loss: 0.0392 - acc: 0.9907 - val_loss: 2.1268 - val_acc: 0.6296
Epoch 91/100
108/108 - 3s - loss: 0.0303 - acc: 1.0000 - val_loss: 2.1386 - val_acc: 0.6296
Epoch 92/100
108/108 - 3s - loss: 0.0637 - acc: 0.9815 - val_loss: 2.1945 - val_acc: 0.6296
Epoch 93/100
108/108 - 2s - loss: 0.0940 - acc: 0.9630 - val_loss: 1.8379 - val_acc: 0.6667
Epoch 94/100
108/108 - 2s - loss: 0.1112 - acc: 0.9815 - val_loss: 1.1909 - val_acc: 0.7407
Epoch 95/100
108/108 - 2s - loss: 0.1741 - acc: 0.9630 - val_loss: 1.0212 - val_acc: 0.7037
Epoch 96/100
108/108 - 3s - loss: 0.0792 - acc: 0.9722 - val_loss: 1.0522 - val_acc: 0.7778
Epoch 97/100
108/108 - 2s - loss: 0.0735 - acc: 0.9815 - val_loss: 1.2720 - val_acc: 0.7037
Epoch 98/100
108/108 - 2s - loss: 0.1179 - acc: 0.9722 - val_loss: 1.5257 - val_acc: 0.6296
Epoch 99/100
108/108 - 2s - loss: 0.0908 - acc: 0.9537 - val_loss: 1.7163 - val_acc: 0.6296
Epoch 100/100
108/108 - 2s - loss: 0.0902 - acc: 0.9537 - val_loss: 1.8089 - val

<tensorflow.python.keras.callbacks.History at 0x1a3d145cc0>

In [34]:
# Evaluate model using test data
model_loss2, model_accuracy2 = model.evaluate(reshaped_X2_test_scaled, y2_test, verbose=2)

34/34 - 0s - loss: 0.7892 - acc: 0.8235


In [35]:
# Make predictions using test data
predictions2 = model.predict_classes(reshaped_X2_test_scaled)

In [37]:
# Compare results
two_qtrs_out = pd.DataFrame({"Predicted":predictions2, "Actual":y2_test})
# two_qtrs_out

In [38]:
# Save model
name2 = f"shuffled-2q-out-{int(dt.datetime.now())}"
model.save(f"models/{name2}.h5")

#### Predict on full X2

In [39]:
pred_X2_full = model.predict_classes(reshaped_X2_full)
X2_full_results = pd.DataFrame({"Predicted":pred_X2_full, "Actual":y2})
X2_full_results.to_csv(f"resources/predictions/X2_full_shuffled_{int(dt.datetime.now())}.csv")
# X2_full_results.loc[X2_full_results["Actual"]==1]

#### Confusion Matrix on X2-Y2 data (recession 2 quarters out)

In [40]:
# Create confusion matrix on X2 model
con_mat = confusion_matrix(y2_test, predictions2)
print(con_mat)

[[27  2]
 [ 4  1]]


In [41]:
# Score model
print(classification_report(y2_test, predictions2))

              precision    recall  f1-score   support

         0.0       0.87      0.93      0.90        29
         1.0       0.33      0.20      0.25         5

    accuracy                           0.82        34
   macro avg       0.60      0.57      0.57        34
weighted avg       0.79      0.82      0.80        34



### Train and predict on X3-Y3 data (recession 4 quarters out)

In [42]:
# Fit the model to the training data
model.fit(reshaped_X3_train_scaled, y3_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

Train on 106 samples, validate on 27 samples
Epoch 1/100
106/106 - 2s - loss: 0.5956 - acc: 0.8679 - val_loss: 1.2949 - val_acc: 0.7407
Epoch 2/100
106/106 - 2s - loss: 0.5008 - acc: 0.8679 - val_loss: 0.9691 - val_acc: 0.6667
Epoch 3/100
106/106 - 2s - loss: 0.4562 - acc: 0.8585 - val_loss: 0.9586 - val_acc: 0.6667
Epoch 4/100
106/106 - 1s - loss: 0.2959 - acc: 0.8868 - val_loss: 1.0320 - val_acc: 0.5926
Epoch 5/100
106/106 - 1s - loss: 0.2441 - acc: 0.9057 - val_loss: 1.0507 - val_acc: 0.5556
Epoch 6/100
106/106 - 1s - loss: 0.1599 - acc: 0.9340 - val_loss: 0.9817 - val_acc: 0.5556
Epoch 7/100
106/106 - 1s - loss: 0.1926 - acc: 0.9245 - val_loss: 0.8635 - val_acc: 0.6667
Epoch 8/100
106/106 - 2s - loss: 0.2792 - acc: 0.8585 - val_loss: 0.7312 - val_acc: 0.7407
Epoch 9/100
106/106 - 1s - loss: 0.2775 - acc: 0.9057 - val_loss: 0.6937 - val_acc: 0.7407
Epoch 10/100
106/106 - 1s - loss: 0.2591 - acc: 0.9057 - val_loss: 0.6463 - val_acc: 0.7778
Epoch 11/100
106/106 - 1s - loss: 0.1994 - a

Epoch 90/100
106/106 - 2s - loss: 0.1194 - acc: 0.9434 - val_loss: 1.6039 - val_acc: 0.7778
Epoch 91/100
106/106 - 2s - loss: 0.1170 - acc: 0.9434 - val_loss: 1.6045 - val_acc: 0.7778
Epoch 92/100
106/106 - 2s - loss: 0.1075 - acc: 0.9717 - val_loss: 1.4824 - val_acc: 0.7778
Epoch 93/100
106/106 - 2s - loss: 0.1020 - acc: 0.9623 - val_loss: 1.3270 - val_acc: 0.7778
Epoch 94/100
106/106 - 2s - loss: 0.0995 - acc: 0.9528 - val_loss: 1.2561 - val_acc: 0.8148
Epoch 95/100
106/106 - 2s - loss: 0.1231 - acc: 0.9340 - val_loss: 1.2638 - val_acc: 0.8148
Epoch 96/100
106/106 - 2s - loss: 0.0835 - acc: 0.9717 - val_loss: 1.3043 - val_acc: 0.8148
Epoch 97/100
106/106 - 2s - loss: 0.1069 - acc: 0.9623 - val_loss: 1.5461 - val_acc: 0.7778
Epoch 98/100
106/106 - 2s - loss: 0.0685 - acc: 0.9717 - val_loss: 1.7136 - val_acc: 0.7778
Epoch 99/100
106/106 - 2s - loss: 0.0666 - acc: 0.9717 - val_loss: 1.7564 - val_acc: 0.7778
Epoch 100/100
106/106 - 3s - loss: 0.0847 - acc: 0.9623 - val_loss: 1.7018 - val

<tensorflow.python.keras.callbacks.History at 0x1a4266eb38>

In [43]:
# Validate model using test data
model_loss3, model_accuracy3 = model.evaluate(reshaped_X3_test_scaled, y3_test, verbose=2)

34/34 - 0s - loss: 0.4408 - acc: 0.9412


In [44]:
# Make predictions using test data
predictions3 = model.predict_classes(reshaped_X3_test_scaled)

In [46]:
# Compare results
four_qtrs_out = pd.DataFrame({"Predicted":predictions3, "Actual":y3_test})
# four_qtrs_out

In [47]:
# Save model
name3 = f"shuffled-4q-out-{int(dt.datetime.now())}"
model.save(f"models/{name3}.h5")

#### Predict on full X3

In [48]:
pred_X3_full = model.predict_classes(reshaped_X3_full)
X3_full_results = pd.DataFrame({"Predicted":pred_X3_full, "Actual":y3})
X3_full_results.to_csv(f"resources/predictions/X3_full_shuffled_{int(dt.datetime.now())}.csv")
# X3_full_results.loc[X3_full_results["Actual"]==1]

#### Confusion Matrix on X3-Y3 data (recession 4 quarters out)

In [49]:
# Create confusion matrix on X3 model
con_mat = confusion_matrix(y3_test, predictions3)
print(con_mat)

[[29  0]
 [ 2  3]]


In [50]:
# Score model
print(classification_report(y3_test, predictions3))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97        29
         1.0       1.00      0.60      0.75         5

    accuracy                           0.94        34
   macro avg       0.97      0.80      0.86        34
weighted avg       0.94      0.94      0.93        34

