Notebook focused on optimizing a model to predict a recession within the next four quarters (1 year).

### Import Dependencies

In [3]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time
from sklearn.metrics import confusion_matrix, classification_report

### Prep Data

In [4]:
# Read in data
df = pd.read_csv("resources/all_data.csv")

# Set index to quarter
df = df.set_index('quarter')

In [5]:
# Save 2019 Q1 & Q2
df_2019 = df.iloc[[-4,-3],:]
df_2019 = df_2019.drop(columns=['recession_actual'])
df_2019

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,real_disp_pers_inc,personal_consumption_exp_excl_food_energy,tot_public_debt_as_pct_of_gdp,gross_private_domestic_invest,M2_velocity,median_sls_price_houses_sold_US,personal_consumption_expenditures
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019Q1,253.311333,21098.827,3.9,1213.0,0.848147,4.133333,2.401311,0.083088,0.004646,0.17,-0.271429,4.5,1.6,104.40334,3783.364,1.458,313000.0,14266.25
2019Q2,255.139333,21340.267,4.7,1255.666667,0.828815,3.5,2.397813,-0.001457,0.024002,0.213333,0.254902,2.4,1.6,103.2006,3749.471,1.457,322500.0,14511.176


In [6]:
# Drop rows with missing values
df = df.dropna()
df.tail()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,recession_actual,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,real_disp_pers_inc,personal_consumption_exp_excl_food_energy,tot_public_debt_as_pct_of_gdp,gross_private_domestic_invest,M2_velocity,median_sls_price_houses_sold_US,personal_consumption_expenditures
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,0.0,4.333333,1.448966,0.204683,0.083902,0.596667,-0.113861,6.9,1.8,104.59493,3542.412,1.451,331800.0,13728.357
2018Q2,250.578667,20510.177,7.1,1259.666667,0.589182,0.0,3.833333,1.727176,0.192007,0.075492,0.446667,-0.251397,2.7,2.0,103.33928,3561.592,1.461,315600.0,13939.828
2018Q3,251.828667,20749.752,4.8,1233.0,0.821959,0.0,3.866667,1.923492,0.113663,0.047184,0.253333,-0.432836,3.3,2.0,103.69309,3683.981,1.462,330900.0,14114.559
2018Q4,252.759,20897.804,2.9,1185.0,0.592021,0.0,3.566667,2.217097,0.152641,0.066218,0.233333,-0.078947,2.8,1.9,105.15026,3725.234,1.462,322800.0,14211.92
2019Q1,253.311333,21098.827,3.9,1213.0,0.848147,0.0,4.133333,2.401311,0.083088,0.004646,0.17,-0.271429,4.5,1.6,104.40334,3783.364,1.458,313000.0,14266.25


### Shift Data

In [7]:
# Add new column with 'recession actual' column shifted 4 rows up (4 quarters ahead) to data frame
df['recession_4q_out'] = df['recession_actual'].shift(-4)
df.tail()

Unnamed: 0_level_0,avg_consumer_price_index,gdp,gdp_pct_change,avg_housing_starts,output_gap,recession_actual,avg_unemployment_rate,fed_funds_avg_rate,fed_funds_percent_change_prev_quarter,fed_funds_st_dev_rate,10YT_minus_2YT_avg,10YT_minus_2YT_percent_change_prev_quarter,real_disp_pers_inc,personal_consumption_exp_excl_food_energy,tot_public_debt_as_pct_of_gdp,gross_private_domestic_invest,M2_velocity,median_sls_price_houses_sold_US,personal_consumption_expenditures,recession_4q_out
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2018Q1,249.250333,20163.159,5.0,1320.666667,0.202456,0.0,4.333333,1.448966,0.204683,0.083902,0.596667,-0.113861,6.9,1.8,104.59493,3542.412,1.451,331800.0,13728.357,0.0
2018Q2,250.578667,20510.177,7.1,1259.666667,0.589182,0.0,3.833333,1.727176,0.192007,0.075492,0.446667,-0.251397,2.7,2.0,103.33928,3561.592,1.461,315600.0,13939.828,
2018Q3,251.828667,20749.752,4.8,1233.0,0.821959,0.0,3.866667,1.923492,0.113663,0.047184,0.253333,-0.432836,3.3,2.0,103.69309,3683.981,1.462,330900.0,14114.559,
2018Q4,252.759,20897.804,2.9,1185.0,0.592021,0.0,3.566667,2.217097,0.152641,0.066218,0.233333,-0.078947,2.8,1.9,105.15026,3725.234,1.462,322800.0,14211.92,
2019Q1,253.311333,21098.827,3.9,1213.0,0.848147,0.0,4.133333,2.401311,0.083088,0.004646,0.17,-0.271429,4.5,1.6,104.40334,3783.364,1.458,313000.0,14266.25,


In [8]:
# Drop missing values
df = df.dropna()

In [9]:
# Define y variables
y = df['recession_4q_out']

# Define X
X = df.drop(columns=['recession_4q_out'])

### Split and scale data

In [10]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

In [11]:
# Create scaler object
X_scaler = StandardScaler().fit(X_train)

In [12]:
# Use X_scaler to scale training, testing, and full X data sets
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_full_scaled = X_scaler.transform(X)

### Reshape data to fit LSTM format

In [None]:
# Method to reshape data
def reshape_data(obj):
    reshaped_obj = np.reshape(obj, (obj.shape[0], obj.shape[1], 1))
    return reshaped_obj

In [None]:
# Reshape training data
reshaped_X1_train_scaled = reshape_data(X1_train_scaled)
reshaped_X2_train_scaled = reshape_data(X2_train_scaled)
reshaped_X3_train_scaled = reshape_data(X3_train_scaled)

In [None]:
# Reshape testing data
reshaped_X1_test_scaled = reshape_data(X1_test_scaled)
reshaped_X2_test_scaled = reshape_data(X2_test_scaled)
reshaped_X3_test_scaled = reshape_data(X3_test_scaled)

In [None]:
# Reshape X_full
reshaped_X1_full = reshape_data(X1_full_scaled)
reshaped_X2_full = reshape_data(X2_full_scaled)
reshaped_X3_full = reshape_data(X3_full_scaled)

# Build Model

In [None]:
# Initialize model
model = Sequential()

In [None]:
# Add layers
model.add(LSTM(128, input_shape=(reshaped_X1_train_scaled.shape[1],1), return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())  # Normalize activation outputs

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.4))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(2, activation='softmax'))

In [None]:
# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=['accuracy'])

## Train and predict on X1-Y1 data (recession 1 quarter out)

In [None]:
# Fit the model to the training data
model.fit(reshaped_X1_train_scaled, y1_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

In [None]:
# Evaluate model using test data
model_loss1, model_accuracy1 = model.evaluate(reshaped_X1_test_scaled, y1_test, verbose=2)

In [None]:
# Make predictions using test data
predictions1_class = model.predict_classes(reshaped_X1_test_scaled) # Predicted class

In [None]:
# Compare results
one_qtr_out = pd.DataFrame({"Predicted":predictions1_class, "Actual":y1_test})
one_qtr_out.loc[one_qtr_out["Actual"]==1]

#### Confusion Matrix on X1-Y1 data (recession 1 quarter out)

In [None]:
# Create confusion matrix on X1 model
con_mat = confusion_matrix(y1_test, predictions1_class)
print(con_mat)

In [None]:
# Score model
print(classification_report(y1_test, predictions1_class))

In [None]:
# Save model
# name1 = f"shuffled-1q-out-{int(time.time())}"
# model.save(f"models/{name1}.h5")

### Predict on 2019

In [None]:
# Scale 2019 data 
scaled_X1_2019 = X1_scaler.transform(df_2019)

# Reshape 2019 data
reshaped_X1_2019 = reshape_data(scaled_X1_2019)

# Predict on 2019
pred_X1_2019 = model.predict_proba(reshaped_X1_2019)
print(f"2019Q1 No Recession Probability: {pred_X1_2019[0][0]}")
print(f"2019Q1 Recession Probability: {pred_X1_2019[0][1]}")
print(f"2019Q2 No Recession Probability: {pred_X1_2019[1][0]}")
print(f"2019Q2 Recession Probability: {pred_X1_2019[1][1]}")

#### Predict on full X1

In [None]:
pred_X1_full = model.predict_classes(reshaped_X1_full)

# Preview results
X1_full_results = pd.DataFrame({"Predicted":pred_X1_full, "Actual":y1})
X1_full_results.loc[X1_full_results["Actual"]==1]

# Export results for graphing
# X1_full_results.to_csv(f"resources/predictions/X1_S_{int(time.time())}.csv")

### Train and predict on X2-Y2 data (recession 2 quarters out)

In [None]:
# Fit the model to the training data
model.fit(reshaped_X2_train_scaled, y2_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

In [None]:
# Evaluate model using test data
model_loss2, model_accuracy2 = model.evaluate(reshaped_X2_test_scaled, y2_test, verbose=2)

In [None]:
# Make predictions using test data
predictions2_class = model.predict_classes(reshaped_X2_test_scaled)

In [None]:
# Compare results
two_qtrs_out = pd.DataFrame({"Predicted":predictions2_class, "Actual":y2_test})
two_qtrs_out.loc[two_qtrs_out["Actual"]==1]

#### Confusion Matrix on X2-Y2 data (recession 2 quarters out)

In [None]:
# Create confusion matrix on X2 model
con_mat = confusion_matrix(y2_test, predictions2_class)
print(con_mat)

In [None]:
# Score model
print(classification_report(y2_test, predictions2_class))

In [None]:
# Save model
# name2 = f"shuffled-2q-out-{int(time.time())}"
# model.save(f"models/{name2}.h5")

### Predict on 2019

In [None]:
# Scale 2019 data 
scaled_X2_2019 = X2_scaler.transform(df_2019)

# Reshape 2019 data
reshaped_X2_2019 = reshape_data(scaled_X2_2019)

# Predict on 2019
pred_X2_2019 = model.predict_proba(reshaped_X2_2019)
print(f"2019Q1 No Recession Probability: {pred_X2_2019[0][0]}")
print(f"2019Q1 Recession Probability: {pred_X2_2019[0][1]}")
print(f"2019Q2 No Recession Probability: {pred_X2_2019[1][0]}")
print(f"2019Q2 Recession Probability: {pred_X2_2019[1][1]}")

#### Predict on full X2

In [None]:
pred_X2_full = model.predict_classes(reshaped_X2_full)
X2_full_results = pd.DataFrame({"Predicted":pred_X2_full, "Actual":y2})
X2_full_results.loc[X2_full_results["Actual"]==1]
# X2_full_results.to_csv(f"resources/predictions/X2_S_{int(time.time())}.csv")

### Train and predict on X3-Y3 data (recession 4 quarters out)

In [None]:
# Fit the model to the training data
model.fit(reshaped_X3_train_scaled, y3_train, validation_split=0.2, epochs=100, shuffle=True, verbose=2)

In [None]:
# Validate model using test data
model_loss3, model_accuracy3 = model.evaluate(reshaped_X3_test_scaled, y3_test, verbose=2)

In [None]:
# Make predictions using test data
predictions3_class = model.predict_classes(reshaped_X3_test_scaled)

In [None]:
# Compare results
four_qtrs_out = pd.DataFrame({"Predicted":predictions3_class, "Actual":y3_test})
four_qtrs_out.loc[four_qtrs_out["Actual"]==1]

#### Confusion Matrix on X3-Y3 data (recession 4 quarters out)

In [None]:
# Create confusion matrix on X3 model
con_mat = confusion_matrix(y3_test, predictions3_class)
print(con_mat)

In [None]:
# Score model
print(classification_report(y3_test, predictions3_class))

In [None]:
# Save model
# name3 = f"shuffled-4q-out-{int(time.time())}"
# model.save(f"models/{name3}.h5")

### Predict on 2019

In [None]:
# Scale 2019 data 
scaled_X3_2019 = X3_scaler.transform(df_2019)

# Reshape 2019 data
reshaped_X3_2019 = reshape_data(scaled_X3_2019)

# Predict on 2019
pred_X3_2019 = model.predict_proba(reshaped_X3_2019)
print(f"2019Q1 No Recession Probability: {pred_X3_2019[0][0]}")
print(f"2019Q1 Recession Probability: {pred_X3_2019[0][1]}")
print(f"2019Q2 No Recession Probability: {pred_X3_2019[1][0]}")
print(f"2019Q2 Recession Probability: {pred_X3_2019[1][1]}")

#### Predict on full X3

In [None]:
pred_X3_full = model.predict_classes(reshaped_X3_full)

# Preview results
X3_full_results = pd.DataFrame({"Predicted":pred_X3_full, "Actual":y3})
X3_full_results.loc[X3_full_results["Actual"]==1]

# Export results for graphing
# X3_full_results.to_csv(f"resources/predictions/X3_S_{int(time.time())}.csv")