In [1]:
# Setup

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

In [19]:
# Import data

from pathlib import Path
cwd = Path.cwd()
datapath = (cwd.parent / 'data/').resolve()

df = pd.read_csv(datapath / 'RecessionTimingRawData2024.csv')

In [20]:
# Set up columns

df.loc[df.rec==1,"rectime"] = 0
for i in range(1,150):
    df.loc[df.rec==0,"rectime"] = df.rectime.shift(-1) + 1
# df["lnrectime"] = np.log(df.rectime)
df["sqrtrectime"] = np.sqrt(df.rectime)

for i in (0,1,3,6,12):
    for var in ("ur",):
        if (i>0): df["s"+str(i)+"_"+var] = df[var].diff(i)
        df["l"+str(i)+"_"+var] = df[var].shift(i)
    for var in ('emp','sp500','ip'):
        if (i==0):
            df["ln"+var] = np.log(df[var])
        else:
            df["s"+str(i)+"_ln"+var] = np.log(df[var]).diff(i)
            df["l"+str(i)+"_ln"+var] = np.log(df[var]).shift(i)

for i in (1,2,3,4,5,6):
    for var in ('r10yr','rff','rbaa','r3mo','r1yr','r3yr','r5yr'):
        df["l"+str(i)+"_"+var] = df[var].shift(i)

df["rbaar10yr"] = df.rbaa - df.r10yr
df["r10yrr3yr"] = df.r10yr - df.r3yr
df["s3_r10yrr3yr"] = df.r10yrr3yr.diff(3)
# df["s12_rff"] = df.rff.diff(i) # not enough history...
        
df.head(24)

Unnamed: 0,period,r10yr,rff,rbaa,r3mo,r1yr,r3yr,r5yr,rec,rec12mo,...,l6_r10yr,l6_rff,l6_rbaa,l6_r3mo,l6_r1yr,l6_r3yr,l6_r5yr,rbaar10yr,r10yrr3yr,s3_r10yrr3yr
0,4/1/1953,2.83,,3.65,2.19,2.36,2.51,2.62,0.0,1.0,...,,,,,,,,0.82,0.32,
1,5/1/1953,3.05,,3.78,2.16,2.48,2.72,2.87,0.0,1.0,...,,,,,,,,0.73,0.33,
2,6/1/1953,3.11,,3.86,2.11,2.45,2.74,2.94,0.0,1.0,...,,,,,,,,0.75,0.37,
3,7/1/1953,2.93,,3.86,2.04,2.38,2.62,2.75,0.0,1.0,...,,,,,,,,0.93,0.31,-0.01
4,8/1/1953,2.95,,3.85,2.04,2.28,2.58,2.8,1.0,1.0,...,,,,,,,,0.9,0.37,0.04
5,9/1/1953,2.87,,3.88,1.79,2.2,2.56,2.71,1.0,1.0,...,,,,,,,,1.01,0.31,-0.06
6,10/1/1953,2.66,,3.82,1.38,1.79,2.23,2.43,1.0,1.0,...,2.83,,3.65,2.19,2.36,2.51,2.62,1.16,0.43,0.12
7,11/1/1953,2.68,,3.75,1.44,1.67,2.2,2.42,1.0,1.0,...,3.05,,3.78,2.16,2.48,2.72,2.87,1.07,0.48,0.11
8,12/1/1953,2.59,,3.74,1.6,1.66,2.07,2.32,1.0,1.0,...,3.11,,3.86,2.11,2.45,2.74,2.94,1.15,0.52,0.21
9,1/1/1954,2.48,,3.71,1.18,1.41,1.89,2.17,1.0,1.0,...,2.93,,3.86,2.04,2.38,2.62,2.75,1.23,0.59,0.16


In [21]:
df.columns

Index(['period', 'r10yr', 'rff', 'rbaa', 'r3mo', 'r1yr', 'r3yr', 'r5yr', 'rec',
       'rec12mo', 'r10yrff', 'emp', 'ur', 'sp500', 'ip', 'cycle', 'r10yrffma',
       's6_lnsp500', 's6_lnemp', 's6_lnip', 'rectime', 'sqrtrectime', 'l0_ur',
       'lnemp', 'lnsp500', 'lnip', 's1_ur', 'l1_ur', 's1_lnemp', 'l1_lnemp',
       's1_lnsp500', 'l1_lnsp500', 's1_lnip', 'l1_lnip', 's3_ur', 'l3_ur',
       's3_lnemp', 'l3_lnemp', 's3_lnsp500', 'l3_lnsp500', 's3_lnip',
       'l3_lnip', 's6_ur', 'l6_ur', 'l6_lnemp', 'l6_lnsp500', 'l6_lnip',
       's12_ur', 'l12_ur', 's12_lnemp', 'l12_lnemp', 's12_lnsp500',
       'l12_lnsp500', 's12_lnip', 'l12_lnip', 'l1_r10yr', 'l1_rff', 'l1_rbaa',
       'l1_r3mo', 'l1_r1yr', 'l1_r3yr', 'l1_r5yr', 'l2_r10yr', 'l2_rff',
       'l2_rbaa', 'l2_r3mo', 'l2_r1yr', 'l2_r3yr', 'l2_r5yr', 'l3_r10yr',
       'l3_rff', 'l3_rbaa', 'l3_r3mo', 'l3_r1yr', 'l3_r3yr', 'l3_r5yr',
       'l4_r10yr', 'l4_rff', 'l4_rbaa', 'l4_r3mo', 'l4_r1yr', 'l4_r3yr',
       'l4_r5yr', 'l5_r10yr'

In [22]:
# Select subset of data

# xlist = ['r10yrffma','r10yr','rff','rbaa','r3mo','r1yr','r3yr','r5yr','ur','s6_lnsp500','s6_lnemp','s6_lnip','s6_ur','s12_ur','s12_lnemp','s12_lnsp500','s12_lnip']
# xlist = ['r10yrffma','rbaar10yr','r10yrr3yr','r10yr','rff','rbaa','r3mo','r1yr','r3yr','r5yr','ur','s3_ur','s3_lnemp','s3_lnsp500','s6_lnsp500','s6_lnemp','s6_lnip','s6_ur','s12_ur','s12_lnemp','s12_lnsp500','s12_lnip']
# xlist = ['r10yrffma','s6_lnsp500','s6_lnemp']
# xlist = ['r10yrffma','s6_lnsp500','s6_lnemp','s3_ur','s6_lnip','rbaar10yr','r10yrr3yr']
xlist = ['lnemp','l1_lnemp','l3_lnemp','l6_lnemp','l12_lnemp',
         'ur','l1_ur','l3_ur','l6_ur','l12_ur',
         'lnsp500','l1_lnsp500','l3_lnsp500','l6_lnsp500','l12_lnsp500',
         'lnip','l1_lnip','l3_lnip','l6_lnip','l12_lnip',
         'r10yr','l1_r10yr','l2_r10yr','l3_r10yr','l4_r10yr','l5_r10yr','l6_r10yr',
         'rff','l1_rff','l2_rff','l3_rff','l4_rff','l5_rff',
         'rbaa','l1_rbaa','l2_rbaa','l3_rbaa','l4_rbaa','l5_rbaa','l6_rbaa',
         'r3yr','l1_r3yr','l2_r3yr','l3_r3yr','l4_r3yr','l5_r3yr','l6_r3yr'] # 'l6_rff',

# df2_oos = df[["period","rec12mo"]+xlist][~np.isnan(df.r10yrffma) & np.isnan(df.rec)]
# df2_oos = df[["period","rec12mo"]+xlist][df.index>=779] # 791
df2_oos = df[["period","sqrtrectime"]+xlist][df.index>=779] # 791
df2_valid = df2_oos[df2_oos.index<=804]

# df2 = df[["period","rec12mo"]+xlist][~np.isnan(df.r10yrffma) & ~np.isnan(df.rec)]
# df2 = df[["period","rec12mo"]+xlist][~np.isnan(df.r10yrffma) & ~np.isnan(df.rec)]
df2 = df[["period","sqrtrectime"]+xlist][~np.isnan(df.r10yrffma) & ~np.isnan(df.rec)]
df2 = df2[df2.index<779]
df2

Unnamed: 0,period,sqrtrectime,lnemp,l1_lnemp,l3_lnemp,l6_lnemp,l12_lnemp,ur,l1_ur,l3_ur,...,l4_rbaa,l5_rbaa,l6_rbaa,r3yr,l1_r3yr,l2_r3yr,l3_r3yr,l4_r3yr,l5_r3yr,l6_r3yr
20,12/1/1954,5.744563,10.806308,10.803242,10.797246,10.797430,10.813821,5.0,5.3,6.1,...,3.49,3.50,3.49,1.81,1.75,1.72,1.60,1.49,1.47,1.53
21,1/1/1955,5.656854,10.809647,10.806308,10.798391,10.796203,10.809101,4.9,5.0,5.7,...,3.47,3.49,3.50,1.99,1.81,1.75,1.72,1.60,1.49,1.47
22,2/1/1955,5.567764,10.812633,10.809647,10.803242,10.796018,10.807341,4.7,4.9,5.3,...,3.46,3.47,3.49,2.12,1.99,1.81,1.75,1.72,1.60,1.49
23,3/1/1955,5.477226,10.819018,10.812633,10.806308,10.797246,10.802775,4.6,4.7,5.0,...,3.45,3.46,3.47,2.26,2.12,1.99,1.81,1.75,1.72,1.60
24,4/1/1955,5.385165,10.824726,10.819018,10.809647,10.798391,10.803222,4.7,4.6,4.9,...,3.45,3.45,3.46,2.37,2.26,2.12,1.99,1.81,1.75,1.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,10/1/2017,5.385165,11.899181,11.898181,11.896636,11.892546,11.884965,4.2,4.3,4.3,...,4.37,4.55,4.57,1.68,1.51,1.48,1.54,1.49,1.48,1.44
775,11/1/2017,5.291503,11.900736,11.899181,11.897555,11.894023,11.885785,4.2,4.2,4.4,...,4.39,4.37,4.55,1.81,1.68,1.51,1.48,1.54,1.49,1.48
776,12/1/2017,5.196152,11.901726,11.900736,11.898181,11.895381,11.887313,4.1,4.2,4.3,...,4.31,4.39,4.37,1.96,1.81,1.68,1.51,1.48,1.54,1.49
777,1/1/2018,5.099020,11.902715,11.901726,11.899181,11.896636,11.888866,4.0,4.1,4.2,...,4.30,4.31,4.39,2.15,1.96,1.81,1.68,1.51,1.48,1.54


In [23]:
df2.columns

Index(['period', 'sqrtrectime', 'lnemp', 'l1_lnemp', 'l3_lnemp', 'l6_lnemp',
       'l12_lnemp', 'ur', 'l1_ur', 'l3_ur', 'l6_ur', 'l12_ur', 'lnsp500',
       'l1_lnsp500', 'l3_lnsp500', 'l6_lnsp500', 'l12_lnsp500', 'lnip',
       'l1_lnip', 'l3_lnip', 'l6_lnip', 'l12_lnip', 'r10yr', 'l1_r10yr',
       'l2_r10yr', 'l3_r10yr', 'l4_r10yr', 'l5_r10yr', 'l6_r10yr', 'rff',
       'l1_rff', 'l2_rff', 'l3_rff', 'l4_rff', 'l5_rff', 'rbaa', 'l1_rbaa',
       'l2_rbaa', 'l3_rbaa', 'l4_rbaa', 'l5_rbaa', 'l6_rbaa', 'r3yr',
       'l1_r3yr', 'l2_r3yr', 'l3_r3yr', 'l4_r3yr', 'l5_r3yr', 'l6_r3yr'],
      dtype='object')

In [None]:
# Scatter plot of SP500 vs Emp (during recessions and expansions)

fig, ax = plt.subplots()
sc = df2.plot.scatter(x='s6_lnsp500', y='r10yrffma', c='rec12mo', colormap='winter', ax=ax) # rainbow

In [None]:
# axes = pd.plotting.scatter_matrix(df2, alpha = 0.2, figsize = (9, 9), range_padding = 0.15)

In [25]:
from sklearn.model_selection import train_test_split

y = df2["sqrtrectime"]
X = df2[xlist]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 0.33

y_valid = df2_valid["sqrtrectime"]
X_valid = df2_valid[xlist]

y_oos = df2_oos["sqrtrectime"]
X_oos = df2_oos[xlist]

In [100]:
# Scale

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(X_train)
X_train = pd.DataFrame(ss.transform(X_train), columns=X_train.columns, index=X_train.index)
X_test = pd.DataFrame(ss.transform(X_test), columns=X_test.columns, index=X_test.index)
X_valid = pd.DataFrame(ss.transform(X_valid), columns=X_valid.columns, index=X_valid.index)
X_oos = pd.DataFrame(ss.transform(X_oos), columns=X_oos.columns, index=X_oos.index)

print(ss.scale_)

[0.3361544  0.33657984 0.33739686 0.33855491 0.34081369 1.53128309
 1.53250761 1.54078805 1.55516289 1.59296429 1.28260219 1.28274542
 1.28130758 1.27920557 1.27957573 0.49844093 0.49937029 0.5013119
 0.50424319 0.51027171 2.82774033 2.82177552 2.80879993 2.80096874
 2.79220041 2.78444926 2.77177976 3.65506856 3.6571577  3.63253738
 3.6333993  3.64233478 3.61989316 2.90454886 2.89688208 2.88527778
 2.87835553 2.87265362 2.86685834 2.8548704  3.15976171 3.15416314
 3.14063869 3.12861439 3.12466697 3.11345437 3.10016277]


In [None]:
# Models: Logistic, Random Forest, SVC, Voting Classifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

log_clf = LogisticRegression(random_state=42,max_iter=1000)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(random_state=42, probability=True)
voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], voting='soft') # hard
mlp_clf = MLPClassifier(hidden_layer_sizes=[50,3,2],random_state=42,max_iter=2000,activation='logistic',alpha=0.001)

voting_clf.fit(X_train, y_train)

for clf in (log_clf, rnd_clf, svm_clf, voting_clf, mlp_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [140]:
# nn diff parameters?

# without standardscalerscaler
# mlp_clf2 = MLPClassifier(hidden_layer_sizes=[10],random_state=42,max_iter=2000,activation='relu',solver='sgd',alpha=0.1) # 0.8846
# mlp_clf2 = MLPClassifier(hidden_layer_sizes=[22],random_state=42,max_iter=2000,activation='relu',solver='sgd',alpha=0.1) # 0.9615
# mlp_clf2 = MLPClassifier(hidden_layer_sizes=[22,11,5],random_state=42,max_iter=2000,activation='relu',solver='sgd',alpha=0.01) # 1.0

# with standardscaler
#mlp_clf2 = MLPClassifier(hidden_layer_sizes=[20,10],random_state=42,max_iter=2500,activation='logistic',solver='lbfgs',alpha=1) # 0.9615

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error

mlp_clf2 = MLPRegressor(hidden_layer_sizes=[10,7],random_state=42,max_iter=2500,activation='tanh',solver='adam',alpha=0.01) # 0.9615


mlp_clf2.fit(X_train, y_train)
y_pred = mlp_clf2.predict(X_test)
print(mlp_clf2.__class__.__name__, mean_absolute_error(y_test, y_pred)) # accuracy_score
y_pred_valid = mlp_clf2.predict(X_valid)
print(mlp_clf2.__class__.__name__, mean_absolute_error(y_valid, y_pred_valid)) # accuracy_score

MLPRegressor 0.99908828921751
MLPRegressor 0.7141391200061361


In [None]:
print(mlp_clf2.coefs_)
print(mlp_clf2.intercepts_)

In [None]:
# try nn

# from sklearn.neural_network import MLPClassifier
# from sklearn.pipeline import make_pipeline
# from sklearn.preprocessing import StandardScaler

# clf_mlp = MLPClassifier(hidden_layer_sizes=[50,50,50], random_state=42)
# pipeline = make_pipeline(Standard)


In [None]:
# feature importances (from random forest)

for name, score in zip(list(X_train), rnd_clf.feature_importances_):
    print(name,":", round(score,2))

In [None]:
# Model: Decision Tree

from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred = tree_clf.predict(X_test)
print(tree_clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [None]:
# Model: Bagging ensembles (of Decision Trees)

from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    max_samples=350, bootstrap=True, n_jobs=-1, random_state=42) # 100
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print(bag_clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [None]:
# AdaBoost

from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2), n_estimators=200, # 2 200
    algorithm="SAMME.R", learning_rate=0.5, random_state=42)
ada_clf.fit(X_train, y_train)
y_pred = ada_clf.predict(X_test)
print(ada_clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [None]:
# Random forest parameters

rnd_clf2 = RandomForestClassifier(n_estimators=500, max_leaf_nodes=32, n_jobs=-1, random_state=42)
rnd_clf2.fit(X_train, y_train)
y_pred = rnd_clf2.predict(X_test)
display(accuracy_score(y_test, y_pred))

In [None]:
# Train/test/validate error

for clf in (log_clf, rnd_clf, svm_clf, voting_clf, tree_clf, bag_clf, ada_clf, rnd_clf2, mlp_clf, mlp_clf2):
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    y_pred_valid = clf.predict(X_valid)
    print(clf.__class__.__name__,"- train:", round(accuracy_score(y_train, y_pred_train),4), "- test:", round(accuracy_score(y_test, y_pred_test),4), "- validate:", round(accuracy_score(y_valid, y_pred_valid),4))

In [None]:
# Predictions on random set of test data

pred_test = log_clf.predict_proba(X_test)
dfpred_test = pd.DataFrame(pred_test, index = X_test.index.values)
pred_test2 = mlp_clf2.predict_proba(X_test) # svm_clf mlp_clf2
dfpred_test2 = pd.DataFrame(pred_test2, index = X_test.index.values)
xb_test = pd.concat([y_test,X_test,dfpred_test,dfpred_test2], axis=1)
xb_test #.tail()

In [137]:
# Predictions on validation data

# pred_valid = log_clf.predict_proba(X_valid)
# dfpred_valid = pd.DataFrame(pred_valid, index = df2_valid.index.values)
pred_valid2 = mlp_clf2.predict(X_valid) # svm_clf mlp_clf2
dfpred_valid2 = pd.DataFrame(pred_valid2, index = df2_valid.index.values)
xb_valid = pd.concat([df2_valid,dfpred_valid2], axis=1) # dfpred_valid,
xb_valid

Unnamed: 0,period,sqrtrectime,lnemp,l1_lnemp,l3_lnemp,l6_lnemp,l12_lnemp,ur,l1_ur,l3_ur,...,l5_rbaa,l6_rbaa,r3yr,l1_r3yr,l2_r3yr,l3_r3yr,l4_r3yr,l5_r3yr,l6_r3yr,0
779,3/1/2018,4.898979,11.906858,11.905332,11.901726,11.898181,11.891198,4.0,4.1,4.1,...,4.32,4.3,2.42,2.36,2.15,1.96,1.81,1.68,1.51,4.928425
780,4/1/2018,4.795832,11.907842,11.906858,11.902715,11.899181,11.892546,4.0,4.0,4.0,...,4.27,4.32,2.52,2.42,2.36,2.15,1.96,1.81,1.68,4.770818
781,5/1/2018,4.690416,11.910056,11.907842,11.905332,11.900736,11.894023,3.8,4.0,4.1,...,4.22,4.27,2.66,2.52,2.42,2.36,2.15,1.96,1.81,5.009122
782,6/1/2018,4.582576,11.911487,11.910056,11.906858,11.901726,11.895381,4.0,3.8,4.0,...,4.26,4.22,2.65,2.66,2.52,2.42,2.36,2.15,1.96,4.519772
783,7/1/2018,4.472136,11.911856,11.911487,11.907842,11.902715,11.896636,3.8,4.0,4.0,...,4.51,4.26,2.7,2.65,2.66,2.52,2.42,2.36,2.15,4.647552
784,8/1/2018,4.358899,11.913539,11.911856,11.910056,11.905332,11.897555,3.8,3.8,3.8,...,4.64,4.51,2.71,2.7,2.65,2.66,2.52,2.42,2.36,4.937905
785,9/1/2018,4.242641,11.914121,11.913539,11.911487,11.906858,11.898181,3.7,3.8,4.0,...,4.67,4.64,2.84,2.71,2.7,2.65,2.66,2.52,2.42,4.973149
786,10/1/2018,4.123106,11.915219,11.914121,11.911856,11.907842,11.899181,3.8,3.7,3.8,...,4.83,4.67,2.94,2.84,2.71,2.7,2.65,2.66,2.52,4.520352
787,11/1/2018,4.0,11.915867,11.915219,11.913539,11.910056,11.900736,3.8,3.8,3.8,...,4.83,4.83,2.91,2.94,2.84,2.71,2.7,2.65,2.66,4.071597
788,12/1/2018,3.872983,11.917083,11.915867,11.914121,11.911487,11.901726,3.9,3.8,3.7,...,4.79,4.83,2.67,2.91,2.94,2.84,2.71,2.7,2.65,3.350024


In [138]:
# Predictions on out-of-sample data

# pred_oos = log_clf.predict_proba(X_oos)
# dfpred_oos = pd.DataFrame(pred_oos, index = df2_oos.index.values)
pred_oos2 = mlp_clf2.predict(X_oos) # svm_clf mlp_clf2
dfpred_oos2 = pd.DataFrame(pred_oos2, index = df2_oos.index.values)
xb_oos = pd.concat([df2_oos,dfpred_oos2], axis=1) # dfpred_oos,
xb_oos.tail(48) # 18

Unnamed: 0,period,sqrtrectime,lnemp,l1_lnemp,l3_lnemp,l6_lnemp,l12_lnemp,ur,l1_ur,l3_ur,...,l5_rbaa,l6_rbaa,r3yr,l1_r3yr,l2_r3yr,l3_r3yr,l4_r3yr,l5_r3yr,l6_r3yr,0
805,5/1/2020,,11.798405,11.778523,11.933667,11.929436,11.922648,13.2,14.8,3.5,...,3.88,3.94,0.22,0.28,0.5,1.31,1.52,1.63,1.61,-4.593033
806,6/1/2020,,11.832506,11.798405,11.924359,11.930266,11.924001,11.0,13.2,4.4,...,3.77,3.88,0.22,0.22,0.28,0.5,1.31,1.52,1.63,-4.634816
807,7/1/2020,,11.843954,11.832506,11.778523,11.931932,11.924598,10.2,11.0,14.8,...,3.61,3.77,0.17,0.22,0.22,0.28,0.5,1.31,1.52,4.868452
808,8/1/2020,,11.854911,11.843954,11.798405,11.933667,11.926061,8.4,10.2,13.2,...,4.29,3.61,0.16,0.17,0.22,0.22,0.28,0.5,1.31,5.104881
809,9/1/2020,,11.862314,11.854911,11.832506,11.924359,11.927403,7.8,8.4,11.0,...,4.13,4.29,0.16,0.16,0.17,0.22,0.22,0.28,0.5,4.473707
810,10/1/2020,,11.867048,11.862314,11.843954,11.778523,11.928064,6.8,7.8,10.2,...,3.95,4.13,0.19,0.16,0.16,0.17,0.22,0.22,0.28,10.159817
811,11/1/2020,,11.868927,11.867048,11.854911,11.798405,11.929436,6.7,6.8,8.4,...,3.64,3.95,0.22,0.19,0.16,0.16,0.17,0.22,0.22,9.967505
812,12/1/2020,,11.867224,11.868927,11.862314,11.832506,11.930266,6.7,6.7,7.8,...,3.31,3.64,0.19,0.22,0.19,0.16,0.16,0.17,0.22,9.424374
813,1/1/2021,,11.870012,11.867224,11.867048,11.843954,11.931932,6.4,6.7,6.8,...,3.27,3.31,0.2,0.19,0.22,0.19,0.16,0.16,0.17,8.562769
814,2/1/2021,,11.873693,11.870012,11.868927,11.854911,11.933667,6.2,6.4,6.7,...,3.36,3.27,0.21,0.2,0.19,0.22,0.19,0.16,0.16,7.331202


In [None]:
# get plot to accept more than two X vars in classifier (but still only charting 2)
# x add more vars to classifier
# x see if way to play with svm parameters? higher dimensions?
# x play with random forest/bagging parameters? (see other notebook)
# x change from rec to rec12mo?
# x predict values for full df (look at months leading up to covid rec)
# x any other features? IP, more interest rates, 3/6/9/12mo growth rates?

# more growth rates (3 and 9) and do interest rate spreads instead of levels (maybe keep rff level too...)
# x calc training/test/(valid?) errors
# x should i keep the 2020 recession as out of sample too? just the rec12mo period, or also the expansion before?

# look at diff ensemble regressions to use (see other notebook and google results)
# sqrt rectime?

# try training a model on each expansion/recession time period window, then average all predictions together?
# try training a model for expansion/prerecession/recession?

# standardscaler: might want to select X subset, scale, then calc lags, etc. (but then how to scale new cols after...?)

# rectime
# monthly or daily?
# linear or sqrt?
# standardscaler?
# split test data into each expansion/recession time period window (rolling cross validation?)
# continue to leave last exp/rec (covid) for validation
# loop over activations, alphas, neurons, solvers...
