In [16]:
import numpy as np
import pandas as pd
from datetime import datetime

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
import warnings
warnings.filterwarnings('ignore')

In [17]:
def read_csv(file):
    df = pd.read_csv(file, parse_dates=True, index_col=0)
    return df

In [18]:
def f1_eval(y_pred, dtrain):
    y_true = dtrain.get_label()
    err = 1-f1_score(y_true, np.round(y_pred))
    return 'f1_err', err

## SVM Without PCA and with Feature Engineering


In [19]:
def scale(df, df_test):
    scaler = preprocessing.StandardScaler()
    scaler.fit(df)
    array = scaler.transform(df)
    array2 = scaler.transform(df_test)
    df_scaled = pd.DataFrame(data=array, index=df.index, columns=df.columns)
    df_test_scaled = pd.DataFrame(data=array2, index=df_test.index, columns=df_test.columns)
    return df_scaled, df_test_scaled

In [None]:
files = ['CompleteWeeklyIndexes', 'MeanWeeklyImputed', 'KNNWeeklyImputed', 'MovingAverageWeeklyImputed', 'RegressionWeeklyImputed', 'MIIWeeklyImputed']

for file in files:
    
    df = read_csv(file+'_training_FE.csv')
    print(file)
    X_test = read_csv(file + '_test_FE.csv')
    
    if "Complete" in file:
        df = df.fillna(0)
        X_test = X_test.fillna(0)
    
    y = pd.read_csv('sp500_target_regimes.csv', parse_dates=True)
    y.index = y['date'].values
    y = y[['regime']]
    y = y['regime']=='BEAR'
    y = pd.DataFrame (y, columns = ['regime'])
       

    X_train = df.loc['2000-01-01':'2015-01-01']
    X_test = X_test.loc['2015-01-02':]
    y_train = y.loc['2000-01-01':'2015-01-01']
    y_test = y.loc['2015-01-02':]

    X_train, X_test = scale(X_train, X_test)
    param_test1 = {
        'gamma':np.logspace(-2, 10, 13),
        'C':np.logspace(-9, 3, 13),
    }
    
    tscv = TimeSeriesSplit(n_splits=3)
    
    gsearch1 = GridSearchCV(estimator = svm.SVC(kernel='rbf', C=1, gamma=1, class_weight={1:2}), 
                            param_grid = param_test1, 
                            scoring='f1',
                            n_jobs=4,
                            iid=False, 
                            cv=tscv)

    gsearch1.fit(X_train, y_train)
    print(gsearch1.best_params_, gsearch1.best_score_)
      

    model = svm.SVC(kernel='rbf', gamma=gsearch1.best_params_['gamma'], C=gsearch1.best_params_['C'], class_weight={1:2})
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    print("_______________________________________")
    print("______________Training_________________")
    tn, fp, fn, tp = confusion_matrix(y_train, y_train_pred).ravel()
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f_score = 2*precision*recall/(precision+recall)  
    print("    ","True", "False")
    print("True ", " ", tp, "  ", fp)
    print("False", " ",fn,"  ", tn)
    print("_______________________________________")
    print("F1 score", 1-f_score)
    print("---------------------------------------")
    print("_______________Testing_________________")
    y_pred = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f_score = 2*precision*recall/(precision+recall)  
    print("    ","True", "False")
    print("True ", " ", tp, "  ", fp)
    print("False", " ",fn,"  ", tn)
    print("_______________________________________")
    print("F1 score", 1-f_score)
    print("---------------------------------------")


CompleteWeeklyIndexes
{'C': 10.0, 'gamma': 0.01} 0.3971608609459077
_______________________________________
______________Training_________________
     True False
True    372    0
False   0    410
_______________________________________
F1 score 0.0
---------------------------------------
_______________Testing_________________
     True False
True    65    201
False   0    6
_______________________________________
F1 score 0.607250755287009
---------------------------------------
MeanWeeklyImputed
{'C': 10.0, 'gamma': 0.01} 0.3927076794308432
_______________________________________
______________Training_________________
     True False
True    372    0
False   0    410
_______________________________________
F1 score 0.0
---------------------------------------
_______________Testing_________________
     True False
True    65    201
False   0    6
_______________________________________
F1 score 0.607250755287009
---------------------------------------
KNNWeeklyImputed
{'C': 1.0, 'g