In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import statsmodels.api as sm
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import ElasticNetCV
from sklearn.naive_bayes import GaussianNB
from sklearn.mixture import GaussianMixture
from xgboost import XGBClassifier
from sklearn.metrics import log_loss
import datetime

In [51]:
recession_data = pd.read_csv("Recession Quarterly Latest.csv",index_col=0, parse_dates=True)

In [52]:
recession_data

Unnamed: 0_level_0,USRECQ,UNEMP,CPI,GDP,STOCKS,TREASURY,FEDFUND
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1854-10-01,1,,,,,,
1855-01-01,0,,,,,,
1855-04-01,0,,,,,,
1855-07-01,0,,,,,,
1855-10-01,0,,,,,,
...,...,...,...,...,...,...,...
2022-01-01,0,3.800000,2.21990,1.60728,-4.08296,1.641774,0.040000
2022-04-01,0,3.600000,2.33140,2.05330,-8.52256,1.828871,0.650000
2022-07-01,0,3.566667,1.35832,1.88314,-2.91799,0.347656,1.420000
2022-10-01,0,3.600000,1.02517,1.60959,-3.38433,-0.360656,1.463333


### Data Preprocessing

In [53]:
recession_data.describe()

Unnamed: 0,USRECQ,UNEMP,CPI,GDP,STOCKS,TREASURY,FEDFUND
count,674.0,301.0,304.0,303.0,599.0,257.0,274.0
mean,0.28635,5.725471,0.872419,1.56434,1.373398,1.329965,0.012725
std,0.452391,1.698062,0.811134,1.322806,7.006181,1.225048,0.854176
min,0.0,2.566667,-2.29019,-8.82764,-31.34328,-2.182139,-3.99
25%,0.0,4.433333,0.399157,1.014995,-2.495115,0.464681,-0.205833
50%,0.0,5.533333,0.766295,1.47711,1.63934,1.369167,0.013333
75%,1.0,6.8,1.142075,2.11695,5.692965,2.389219,0.369167
max,1.0,12.966667,4.06317,8.78811,35.05866,3.608033,6.016667


In [54]:
missing_values = recession_data.isnull().sum()

# Print the result
print(missing_values)

USRECQ        0
UNEMP       373
CPI         370
GDP         371
STOCKS       75
TREASURY    417
FEDFUND     400
dtype: int64


In [55]:
recession_data.index = pd.to_datetime(recession_data.index)
recession_data['QUARTER'] = recession_data.index.to_period('Q').strftime('Q%q')
recession_data = recession_data[recession_data.index >= datetime.datetime(1959, 1, 1)]
recession_data = recession_data.ffill()

In [56]:
missing_values = recession_data.isnull().sum()

# Print the result
print(missing_values)

USRECQ      0
UNEMP       0
CPI         0
GDP         0
STOCKS      0
TREASURY    0
FEDFUND     0
QUARTER     0
dtype: int64


In [57]:
recession_data

Unnamed: 0_level_0,USRECQ,UNEMP,CPI,GDP,STOCKS,TREASURY,FEDFUND,QUARTER
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1959-01-01,0,5.833333,0.17275,2.15692,7.74026,1.158290,0.406667,Q1
1959-04-01,0,5.100000,0.17245,2.41471,4.86404,1.191695,0.513333,Q2
1959-07-01,0,5.266667,0.51647,0.45556,4.23349,0.881416,0.493333,Q3
1959-10-01,0,5.600000,0.60516,0.67919,-0.19093,0.248136,0.413333,Q4
1960-01-01,0,5.133333,0.09080,2.65759,-2.56506,0.520221,-0.056667,Q1
...,...,...,...,...,...,...,...,...
2022-01-01,0,3.800000,2.21990,1.60728,-4.08296,1.641774,0.040000,Q1
2022-04-01,0,3.600000,2.33140,2.05330,-8.52256,1.828871,0.650000,Q2
2022-07-01,0,3.566667,1.35832,1.88314,-2.91799,0.347656,1.420000,Q3
2022-10-01,0,3.600000,1.02517,1.60959,-3.38433,-0.360656,1.463333,Q4


### Model Fitting

In [58]:
lags = [1, 2, 4]  # Lags for next quarters, next 2 quarters, next 3 quarters, next 4 quarters
for lag in lags:
    recession_data[f'UNEMP_lag{lag}'] = recession_data['UNEMP'].shift(lag)
    recession_data[f'CPI_lag{lag}'] = recession_data['CPI'].shift(lag)
    recession_data[f'GDP_lag{lag}'] = recession_data['GDP'].shift(lag)
    recession_data[f'STOCKS_lag{lag}'] = recession_data['STOCKS'].shift(lag)
    recession_data[f'TREASURY_lag{lag}'] = recession_data['TREASURY'].shift(lag)
    recession_data[f'FEDFUND_lag{lag}'] = recession_data['FEDFUND'].shift(lag)
recession_data

Unnamed: 0_level_0,USRECQ,UNEMP,CPI,GDP,STOCKS,TREASURY,FEDFUND,QUARTER,UNEMP_lag1,CPI_lag1,...,GDP_lag2,STOCKS_lag2,TREASURY_lag2,FEDFUND_lag2,UNEMP_lag4,CPI_lag4,GDP_lag4,STOCKS_lag4,TREASURY_lag4,FEDFUND_lag4
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1959-01-01,0,5.833333,0.17275,2.15692,7.74026,1.158290,0.406667,Q1,,,...,,,,,,,,,,
1959-04-01,0,5.100000,0.17245,2.41471,4.86404,1.191695,0.513333,Q2,5.833333,0.17275,...,,,,,,,,,,
1959-07-01,0,5.266667,0.51647,0.45556,4.23349,0.881416,0.493333,Q3,5.100000,0.17245,...,2.15692,7.74026,1.158290,0.406667,,,,,,
1959-10-01,0,5.600000,0.60516,0.67919,-0.19093,0.248136,0.413333,Q4,5.266667,0.51647,...,2.41471,4.86404,1.191695,0.513333,,,,,,
1960-01-01,0,5.133333,0.09080,2.65759,-2.56506,0.520221,-0.056667,Q1,5.600000,0.60516,...,0.45556,4.23349,0.881416,0.493333,5.833333,0.17275,2.15692,7.74026,1.158290,0.406667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-01,0,3.800000,2.21990,1.60728,-4.08296,1.641774,0.040000,Q1,4.200000,2.13250,...,2.18461,5.14990,1.275156,0.020000,6.200000,1.03022,2.80651,10.53875,1.284754,-0.010000
2022-04-01,0,3.600000,2.33140,2.05330,-8.52256,1.828871,0.650000,Q2,3.800000,2.21990,...,3.39145,3.71596,1.479194,-0.010000,5.933333,1.82899,3.28533,7.53339,1.565625,-0.010000
2022-07-01,0,3.566667,1.35832,1.88314,-2.91799,0.347656,1.420000,Q3,3.600000,2.33140,...,1.60728,-4.08296,1.641774,0.040000,5.133333,1.61219,2.18461,5.14990,1.275156,0.020000
2022-10-01,0,3.600000,1.02517,1.60959,-3.38433,-0.360656,1.463333,Q4,3.566667,1.35832,...,2.05330,-8.52256,1.828871,0.650000,4.200000,2.13250,3.39145,3.71596,1.479194,-0.010000


In [59]:
recession_data.dropna(inplace=True)

In [60]:
# Split the data into train and test sets
train_data = recession_data.loc[:'01-10-1999']
test_data = recession_data.loc['01-01-2000':]


### Probit Model

In [61]:
# Fit probit model
model = sm.Probit(train_data['USRECQ'], train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])
result_current = model.fit()

# Print summary
print(result_current.summary())

Optimization terminated successfully.
         Current function value: 0.194713
         Iterations 8
                          Probit Regression Results                           
Dep. Variable:                 USRECQ   No. Observations:                  157
Model:                         Probit   Df Residuals:                      151
Method:                           MLE   Df Model:                            5
Date:                Tue, 25 Apr 2023   Pseudo R-squ.:                  0.5051
Time:                        03:00:42   Log-Likelihood:                -30.570
converged:                       True   LL-Null:                       -61.775
Covariance Type:            nonrobust   LLR p-value:                 3.858e-12
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
UNEMP          0.0600      0.095      0.632      0.527      -0.126       0.246
CPI            0.6475      0.

In [62]:
# Fit probit model
model = sm.Probit(train_data['USRECQ'], train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])
result_next = model.fit()

# Print summary
print(result_next.summary())

Optimization terminated successfully.
         Current function value: 0.156372
         Iterations 9
                          Probit Regression Results                           
Dep. Variable:                 USRECQ   No. Observations:                  157
Model:                         Probit   Df Residuals:                      151
Method:                           MLE   Df Model:                            5
Date:                Tue, 25 Apr 2023   Pseudo R-squ.:                  0.6026
Time:                        03:00:44   Log-Likelihood:                -24.550
converged:                       True   LL-Null:                       -61.775
Covariance Type:            nonrobust   LLR p-value:                 1.213e-14
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
UNEMP_lag1       -0.0388      0.107     -0.363      0.717      -0.249       0.171
CPI_lag1          1.

In [64]:
# Fit probit model
model = sm.Probit(train_data['USRECQ'], train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])
result_next2 = model.fit()

# Print summary
print(result_next2.summary())

Optimization terminated successfully.
         Current function value: 0.235005
         Iterations 8
                          Probit Regression Results                           
Dep. Variable:                 USRECQ   No. Observations:                  157
Model:                         Probit   Df Residuals:                      151
Method:                           MLE   Df Model:                            5
Date:                Tue, 25 Apr 2023   Pseudo R-squ.:                  0.4027
Time:                        03:00:54   Log-Likelihood:                -36.896
converged:                       True   LL-Null:                       -61.775
Covariance Type:            nonrobust   LLR p-value:                 1.553e-09
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
UNEMP_lag2        0.0954      0.098      0.978      0.328      -0.096       0.287
CPI_lag2          0.

In [65]:
# Fit probit model
model = sm.Probit(train_data['USRECQ'], train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])
result_next4 = model.fit()

# Print summary
print(result_next4.summary())

Optimization terminated successfully.
         Current function value: 0.257320
         Iterations 8
                          Probit Regression Results                           
Dep. Variable:                 USRECQ   No. Observations:                  157
Model:                         Probit   Df Residuals:                      151
Method:                           MLE   Df Model:                            5
Date:                Tue, 25 Apr 2023   Pseudo R-squ.:                  0.3460
Time:                        03:00:59   Log-Likelihood:                -40.399
converged:                       True   LL-Null:                       -61.775
Covariance Type:            nonrobust   LLR p-value:                 4.151e-08
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
UNEMP_lag4       -0.1103      0.092     -1.198      0.231      -0.291       0.070
CPI_lag4          0.

In [66]:
probs = test_data.iloc[:, [0]].copy()
probs

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [67]:
# Make predictions using the trained model
probs['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])
                                             

In [68]:
probs['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

In [69]:
probs['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])

In [70]:
probs['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])

In [71]:
probs.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0.0192745,0.01020821,0.01622367,0.040771
2018-07-01,0,0.05615698,0.01593732,0.02868011,0.057744
2018-10-01,0,0.2573427,0.009493728,0.05344748,0.061985
2019-01-01,0,0.1442057,0.3972194,0.06553651,0.06013
2019-04-01,0,0.1424645,0.08470136,0.3696785,0.087242
2019-07-01,0,0.1872528,0.05453553,0.3038136,0.102362
2019-10-01,0,0.4729204,0.1117348,0.2283504,0.176259
2020-01-01,1,0.9575103,0.2947115,0.3837647,0.25587
2020-04-01,1,1.0,0.9306465,0.3109282,0.319781
2020-07-01,0,2.12496e-26,1.0,0.7738634,0.415277


**Log-loss error**

In [72]:
df_logloss = pd.DataFrame(columns=['model name', 'log loss current quarter', 'log loss next quarter', 'log loss next 2 quarters', 'log loss next 4 quarters'])


In [73]:
# Calculate log-loss for each time frame
logloss_curr_quarter_probit = log_loss(test_data['USRECQ'], probs['prob_curr_quarter'])
logloss_next_quarter_probit = log_loss(test_data['USRECQ'], probs['prob_next_quarter'])
logloss_next2_quarter_probit = log_loss(test_data['USRECQ'], probs['prob_next2_quarter'])
logloss_next4_quarter_probit = log_loss(test_data['USRECQ'], probs['prob_next4_quarter'])


In [74]:
df_logloss.loc[0] = ['Probit', logloss_curr_quarter_probit, logloss_next_quarter_probit, logloss_next2_quarter_probit, logloss_next4_quarter_probit]

In [75]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619


### SVM Model

In [76]:
probs_svm = test_data.iloc[:, [0]].copy()
probs_svm

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [77]:
# Fit SVM modeland Make predictions using the trained model
model = SVC(kernel='linear', probability=True)

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_svm['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])


result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_svm['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])


result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_svm['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])


result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_svm['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])


In [78]:
probs_svm.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0,0,0,0
2018-07-01,0,0,0,0,0
2018-10-01,0,0,0,0,0
2019-01-01,0,0,0,0,0
2019-04-01,0,0,0,0,0
2019-07-01,0,0,0,0,0
2019-10-01,0,0,0,0,0
2020-01-01,1,1,0,0,0
2020-04-01,1,1,1,0,0
2020-07-01,0,0,1,1,0


**Log-loss Error**

In [79]:
# Calculate log-loss for each time frame
logloss_curr_quarter_svm = log_loss(test_data['USRECQ'], probs_svm['prob_curr_quarter'])
logloss_next_quarter_svm = log_loss(test_data['USRECQ'], probs_svm['prob_next_quarter'])
logloss_next2_quarter_svm = log_loss(test_data['USRECQ'], probs_svm['prob_next2_quarter'])
logloss_next4_quarter_svm = log_loss(test_data['USRECQ'], probs_svm['prob_next4_quarter'])


In [80]:
df_logloss.loc[1] = ['SVM', logloss_curr_quarter_svm, logloss_next_quarter_svm, logloss_next2_quarter_svm, logloss_next4_quarter_svm]

In [81]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625


### KNN Model

In [82]:
probs_knn = test_data.iloc[:, [0]].copy()
probs_knn

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [83]:
# Fit KNN model
k = 5
model = KNeighborsClassifier(n_neighbors=k)

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_knn['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])

result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_knn['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_knn['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])


result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_knn['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [84]:
probs_knn.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0,0,0,0
2018-07-01,0,0,0,0,0
2018-10-01,0,0,0,0,0
2019-01-01,0,0,0,0,0
2019-04-01,0,0,0,0,0
2019-07-01,0,0,0,0,0
2019-10-01,0,0,0,0,0
2020-01-01,1,0,0,0,0
2020-04-01,1,1,0,0,0
2020-07-01,0,0,1,0,0


**Log-loss Error**

In [85]:
# Calculate log-loss for each time frame
logloss_curr_quarter_knn = log_loss(test_data['USRECQ'], probs_knn['prob_curr_quarter'])
logloss_next_quarter_knn = log_loss(test_data['USRECQ'], probs_knn['prob_next_quarter'])
logloss_next2_quarter_knn = log_loss(test_data['USRECQ'], probs_knn['prob_next2_quarter'])
logloss_next4_quarter_knn = log_loss(test_data['USRECQ'], probs_knn['prob_next4_quarter'])

In [86]:
df_logloss.loc[2] = ['KNN', logloss_curr_quarter_knn, logloss_next_quarter_knn, logloss_next2_quarter_knn, logloss_next4_quarter_knn]

In [87]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625
2,KNN,3.713847,4.828018,4.456625,4.085232


### Elastic Net

In [88]:
probs_els = test_data.iloc[:, [0]].copy()
probs_els

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [89]:
# Define the elastic net model
model = ElasticNetCV(cv=5, random_state=0)

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_els['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])

result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_els['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_els['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])


result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_els['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])


In [90]:
probs_els.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,-0.013894,0.040105,0.031383,0.074823
2018-07-01,0,0.016668,0.056976,0.064735,0.10724
2018-10-01,0,0.110228,0.007862,0.093112,0.118725
2019-01-01,0,0.07469,0.241625,0.072182,0.119248
2019-04-01,0,0.096696,0.116635,0.244341,0.131674
2019-07-01,0,0.106326,0.10475,0.191027,0.133724
2019-10-01,0,0.204563,0.137716,0.169412,0.174906
2020-01-01,1,0.348828,0.212987,0.203818,0.203774
2020-04-01,1,1.63817,0.395358,0.194925,0.236308
2020-07-01,0,-0.522141,1.381898,0.338936,0.25007


**Log-loss Error**

In [91]:
# Calculate log-loss for each time frame
logloss_curr_quarter_els = log_loss(test_data['USRECQ'], probs_els['prob_curr_quarter'])
logloss_next_quarter_els = log_loss(test_data['USRECQ'], probs_els['prob_next_quarter'])
logloss_next2_quarter_els = log_loss(test_data['USRECQ'], probs_els['prob_next2_quarter'])
logloss_next4_quarter_els = log_loss(test_data['USRECQ'], probs_els['prob_next4_quarter'])

In [92]:
df_logloss.loc[3] = ['Elastic Net', logloss_curr_quarter_els, logloss_next_quarter_els, logloss_next2_quarter_els, logloss_next4_quarter_els]

In [93]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625
2,KNN,3.713847,4.828018,4.456625,4.085232
3,Elastic Net,0.225719,0.577494,0.63704,0.266494


### Naive Bayes Model

In [94]:
probs_nb = test_data.iloc[:, [0]].copy()
probs_nb

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [95]:
# Fit Naive Bayes model
model = GaussianNB()

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_nb['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])

result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_nb['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_nb['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])

result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_nb['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])


In [96]:
probs_nb.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0,0,0,0
2018-07-01,0,0,0,0,0
2018-10-01,0,0,0,0,0
2019-01-01,0,0,0,0,0
2019-04-01,0,0,0,0,0
2019-07-01,0,0,0,0,0
2019-10-01,0,0,0,0,0
2020-01-01,1,0,0,0,0
2020-04-01,1,1,0,0,0
2020-07-01,0,0,1,0,0


**Log-loss Error**

In [97]:
# Calculate log-loss for each time frame
logloss_curr_quarter_nb = log_loss(test_data['USRECQ'], probs_nb['prob_curr_quarter'])
logloss_next_quarter_nb = log_loss(test_data['USRECQ'], probs_nb['prob_next_quarter'])
logloss_next2_quarter_nb = log_loss(test_data['USRECQ'], probs_nb['prob_next2_quarter'])
logloss_next4_quarter_nb = log_loss(test_data['USRECQ'], probs_nb['prob_next4_quarter'])

In [98]:
df_logloss.loc[4] = ['Naive Bayes', logloss_curr_quarter_nb, logloss_next_quarter_nb, logloss_next2_quarter_nb, logloss_next4_quarter_nb]

In [99]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625
2,KNN,3.713847,4.828018,4.456625,4.085232
3,Elastic Net,0.225719,0.577494,0.63704,0.266494
4,Naive Bayes,2.599701,3.342479,3.342462,4.828018


### Gaussian Mixture Models

In [100]:
probs_gmm = test_data.iloc[:, [0]].copy()
probs_gmm

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [101]:
# Fit Gaussian Mixture model
model = GaussianMixture(n_components=2, covariance_type='full')

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_gmm['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])

result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_gmm['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_gmm['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])


result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_gmm['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])



In [102]:
probs_gmm.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0,1,0,0
2018-07-01,0,0,1,0,0
2018-10-01,0,0,1,0,0
2019-01-01,0,0,1,0,0
2019-04-01,0,0,1,0,0
2019-07-01,0,0,1,0,0
2019-10-01,0,0,1,0,0
2020-01-01,1,0,1,0,0
2020-04-01,1,1,1,0,0
2020-07-01,0,1,0,0,0


**Log-loss Error**

In [103]:
# Calculate log-loss for each time frame
logloss_curr_quarter_gmm = log_loss(test_data['USRECQ'], probs_gmm['prob_curr_quarter'])
logloss_next_quarter_gmm = log_loss(test_data['USRECQ'], probs_gmm['prob_next_quarter'])
logloss_next2_quarter_gmm = log_loss(test_data['USRECQ'], probs_gmm['prob_next2_quarter'])
logloss_next4_quarter_gmm = log_loss(test_data['USRECQ'], probs_gmm['prob_next4_quarter'])

In [104]:
df_logloss.loc[5] = ['Gaussian Model', logloss_curr_quarter_gmm, logloss_next_quarter_gmm, logloss_next2_quarter_gmm, logloss_next4_quarter_gmm]

In [105]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625
2,KNN,3.713847,4.828018,4.456625,4.085232
3,Elastic Net,0.225719,0.577494,0.63704,0.266494
4,Naive Bayes,2.599701,3.342479,3.342462,4.828018
5,Gaussian Model,7.05643,25.254718,9.656148,10.398926


### XGBoost Model

In [106]:
probs_XGBoost = test_data.iloc[:, [0]].copy()
probs_XGBoost

Unnamed: 0_level_0,USRECQ
DATE,Unnamed: 1_level_1
2000-01-01,0
2000-04-01,0
2000-07-01,0
2000-10-01,0
2001-01-01,0
...,...
2022-01-01,0
2022-04-01,0
2022-07-01,0
2022-10-01,0


In [107]:
# Fit XGBoost model
model = XGBClassifier()

result_current = model.fit(train_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']], train_data['USRECQ'])
probs_XGBoost['prob_curr_quarter'] = result_current.predict(test_data[['UNEMP', 'CPI', 'GDP', 'STOCKS', 'TREASURY', 'FEDFUND']])

result_next = model.fit(train_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']], train_data['USRECQ'])
probs_XGBoost['prob_next_quarter'] = result_next.predict(test_data[['UNEMP_lag1', 'CPI_lag1', 'GDP_lag1', 'STOCKS_lag1', 'TREASURY_lag1', 'FEDFUND_lag1']])

result_next2 = model.fit( train_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']], train_data['USRECQ'])
probs_XGBoost['prob_next2_quarter'] = result_next2.predict(test_data[['UNEMP_lag2', 'CPI_lag2', 'GDP_lag2', 'STOCKS_lag2', 'TREASURY_lag2', 'FEDFUND_lag2']])


result_next4 = model.fit(train_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']], train_data['USRECQ'])
probs_XGBoost['prob_next4_quarter'] = result_next4.predict(test_data[['UNEMP_lag4', 'CPI_lag4', 'GDP_lag4', 'STOCKS_lag4', 'TREASURY_lag4', 'FEDFUND_lag4']])


In [108]:
probs_XGBoost.tail(20)

Unnamed: 0_level_0,USRECQ,prob_curr_quarter,prob_next_quarter,prob_next2_quarter,prob_next4_quarter
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-01,0,0,0,0,0
2018-07-01,0,0,0,0,0
2018-10-01,0,0,0,0,0
2019-01-01,0,0,1,0,0
2019-04-01,0,0,0,0,0
2019-07-01,0,0,0,0,0
2019-10-01,0,0,0,0,0
2020-01-01,1,1,0,1,0
2020-04-01,1,1,1,0,0
2020-07-01,0,0,1,0,0


**Log-loss Error**

In [109]:
# Calculate log-loss for each time frame
logloss_curr_quarter_XGBoost = log_loss(test_data['USRECQ'], probs_XGBoost['prob_curr_quarter'])
logloss_next_quarter_XGBoost = log_loss(test_data['USRECQ'], probs_XGBoost['prob_next_quarter'])
logloss_next2_quarter_XGBoost = log_loss(test_data['USRECQ'], probs_XGBoost['prob_next2_quarter'])
logloss_next4_quarter_XGBoost = log_loss(test_data['USRECQ'], probs_XGBoost['prob_next4_quarter'])

In [110]:
df_logloss.loc[6] = ['XGBoost', logloss_curr_quarter_XGBoost, logloss_next_quarter_XGBoost, logloss_next2_quarter_XGBoost, logloss_next4_quarter_XGBoost]

In [111]:
df_logloss

Unnamed: 0,model name,log loss current quarter,log loss next quarter,log loss next 2 quarters,log loss next 4 quarters
0,Probit,0.176972,0.504961,0.533439,0.250619
1,SVM,2.228325,2.228325,4.085257,4.456625
2,KNN,3.713847,4.828018,4.456625,4.085232
3,Elastic Net,0.225719,0.577494,0.63704,0.266494
4,Naive Bayes,2.599701,3.342479,3.342462,4.828018
5,Gaussian Model,7.05643,25.254718,9.656148,10.398926
6,XGBoost,1.856949,4.456685,3.713881,4.08524
