# Predicting Economic Recessions Using Technical Indicators

This notebook contains codes for validating and training models for recession prediction. For detailed explanation of each model, please refer to the [paper](https://github.com/michaelyip1433/Sample-Codes/blob/master/1.%20Predicting%20Economic%20Recessions%20Using%20Technical%20Indicators/Writing%20Sample.pdf). Diagrams are drawn in a [seperate notebook](https://github.com/michaelyip1433/Sample-Codes/blob/master/1.%20Predicting%20Economic%20Recessions%20Using%20Technical%20Indicators/Diagrams.ipynb). We first strat with some imports.

In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import pickle
import warnings
%matplotlib inline

### 1. Data Cleaning
Load the dataset for the project, and convert the column "Date" to datetime by ```pd.to_datetime```.

In [3]:
# Remember to set the correct working directory
data = pd.read_csv("/Users/cheukheiyip/Desktop/1. ML paper/Data.csv")
data['Date'] = pd.to_datetime(data['Date'], format = '%Y-%m-%d')

Since the data are observed daily, the terms “month” and “quarter” have to be carefully defined. In this paper, an effective month is defined to be 21 data points, and an effective quarter is defined to be 63 data points.

In [4]:
eff_month_size = 21
eff_quarter_size = 63

Obtain the lagged variables as predictors, and then drop the original variables. 

In [5]:
# Spread lag
data['Spread_lag_1Y'] = data.Spread.shift(eff_quarter_size * 4)


# RSI lag

data['NYA_RSI_100_lag_1Q'] = data.NYA_RSI_100.shift(eff_quarter_size)
data['NYA_RSI_200_lag_1Q'] = data.NYA_RSI_200.shift(eff_quarter_size)
data['NYA_RSI_300_lag_1Q'] = data.NYA_RSI_300.shift(eff_quarter_size)

data['NYA_RSI_100_lag_2Q'] = data.NYA_RSI_100.shift(eff_quarter_size * 2)
data['NYA_RSI_200_lag_2Q'] = data.NYA_RSI_200.shift(eff_quarter_size * 2)
data['NYA_RSI_300_lag_2Q'] = data.NYA_RSI_300.shift(eff_quarter_size * 2)

###############################

data['SPX_RSI_100_lag_1Q'] = data.SPX_RSI_100.shift(eff_quarter_size)
data['SPX_RSI_200_lag_1Q'] = data.SPX_RSI_200.shift(eff_quarter_size)
data['SPX_RSI_300_lag_1Q'] = data.SPX_RSI_300.shift(eff_quarter_size)

data['SPX_RSI_100_lag_2Q'] = data.SPX_RSI_100.shift(eff_quarter_size * 2)
data['SPX_RSI_200_lag_2Q'] = data.SPX_RSI_200.shift(eff_quarter_size * 2)
data['SPX_RSI_300_lag_2Q'] = data.SPX_RSI_300.shift(eff_quarter_size * 2)

################################

data['INDU_RSI_100_lag_1Q'] = data.INDU_RSI_100.shift(eff_quarter_size)
data['INDU_RSI_200_lag_1Q'] = data.INDU_RSI_200.shift(eff_quarter_size)
data['INDU_RSI_300_lag_1Q'] = data.INDU_RSI_300.shift(eff_quarter_size)

data['INDU_RSI_100_lag_2Q'] = data.INDU_RSI_100.shift(eff_quarter_size * 2)
data['INDU_RSI_200_lag_2Q'] = data.INDU_RSI_200.shift(eff_quarter_size * 2)
data['INDU_RSI_300_lag_2Q'] = data.INDU_RSI_300.shift(eff_quarter_size * 2)


In [6]:
# DMI lag

data['NYA_PosDMI_100_lag_1Q'] = data.NYA_PosDMI_100.shift(eff_quarter_size)
data['NYA_NegDMI_100_lag_1Q'] = data.NYA_NegDMI_100.shift(eff_quarter_size)
data['NYA_Trend_100_lag_1Q'] = data.NYA_Trend_100.shift(eff_quarter_size)
data['NYA_PosDMI_200_lag_1Q'] = data.NYA_PosDMI_200.shift(eff_quarter_size)
data['NYA_NegDMI_200_lag_1Q'] = data.NYA_NegDMI_200.shift(eff_quarter_size)
data['NYA_Trend_200_lag_1Q'] = data.NYA_Trend_200.shift(eff_quarter_size)
data['NYA_PosDMI_300_lag_1Q'] = data.NYA_PosDMI_300.shift(eff_quarter_size)
data['NYA_NegDMI_300_lag_1Q'] = data.NYA_NegDMI_300.shift(eff_quarter_size)
data['NYA_Trend_300_lag_1Q'] = data.NYA_Trend_300.shift(eff_quarter_size)

data['NYA_PosDMI_100_lag_2Q'] = data.NYA_PosDMI_100.shift(eff_quarter_size * 2)
data['NYA_NegDMI_100_lag_2Q'] = data.NYA_NegDMI_100.shift(eff_quarter_size * 2)
data['NYA_Trend_100_lag_2Q'] = data.NYA_Trend_100.shift(eff_quarter_size * 2)
data['NYA_PosDMI_200_lag_2Q'] = data.NYA_PosDMI_200.shift(eff_quarter_size * 2)
data['NYA_NegDMI_200_lag_2Q'] = data.NYA_NegDMI_200.shift(eff_quarter_size * 2)
data['NYA_Trend_200_lag_2Q'] = data.NYA_Trend_200.shift(eff_quarter_size * 2)
data['NYA_PosDMI_300_lag_2Q'] = data.NYA_PosDMI_300.shift(eff_quarter_size * 2)
data['NYA_NegDMI_300_lag_2Q'] = data.NYA_NegDMI_300.shift(eff_quarter_size * 2)
data['NYA_Trend_300_lag_2Q'] = data.NYA_Trend_300.shift(eff_quarter_size * 2)

############################

data['SPX_PosDMI_100_lag_1Q'] = data.SPX_PosDMI_100.shift(eff_quarter_size)
data['SPX_NegDMI_100_lag_1Q'] = data.SPX_NegDMI_100.shift(eff_quarter_size)
data['SPX_Trend_100_lag_1Q'] = data.SPX_Trend_100.shift(eff_quarter_size)
data['SPX_PosDMI_200_lag_1Q'] = data.SPX_PosDMI_200.shift(eff_quarter_size)
data['SPX_NegDMI_200_lag_1Q'] = data.SPX_NegDMI_200.shift(eff_quarter_size)
data['SPX_Trend_200_lag_1Q'] = data.SPX_Trend_200.shift(eff_quarter_size)
data['SPX_PosDMI_300_lag_1Q'] = data.SPX_PosDMI_300.shift(eff_quarter_size)
data['SPX_NegDMI_300_lag_1Q'] = data.SPX_NegDMI_300.shift(eff_quarter_size)
data['SPX_Trend_300_lag_1Q'] = data.SPX_Trend_300.shift(eff_quarter_size)

data['SPX_PosDMI_100_lag_2Q'] = data.SPX_PosDMI_100.shift(eff_quarter_size * 2)
data['SPX_NegDMI_100_lag_2Q'] = data.SPX_NegDMI_100.shift(eff_quarter_size * 2)
data['SPX_Trend_100_lag_2Q'] = data.SPX_Trend_100.shift(eff_quarter_size * 2)
data['SPX_PosDMI_200_lag_2Q'] = data.SPX_PosDMI_200.shift(eff_quarter_size * 2)
data['SPX_NegDMI_200_lag_2Q'] = data.SPX_NegDMI_200.shift(eff_quarter_size * 2)
data['SPX_Trend_200_lag_2Q'] = data.SPX_Trend_200.shift(eff_quarter_size * 2)
data['SPX_PosDMI_300_lag_2Q'] = data.SPX_PosDMI_300.shift(eff_quarter_size * 2)
data['SPX_NegDMI_300_lag_2Q'] = data.SPX_NegDMI_300.shift(eff_quarter_size * 2)
data['SPX_Trend_300_lag_2Q'] = data.SPX_Trend_300.shift(eff_quarter_size * 2)

############################

data['INDU_PosDMI_100_lag_1Q'] = data.INDU_PosDMI_100.shift(eff_quarter_size)
data['INDU_NegDMI_100_lag_1Q'] = data.INDU_NegDMI_100.shift(eff_quarter_size)
data['INDU_Trend_100_lag_1Q'] = data.INDU_Trend_100.shift(eff_quarter_size)
data['INDU_PosDMI_200_lag_1Q'] = data.INDU_PosDMI_200.shift(eff_quarter_size)
data['INDU_NegDMI_200_lag_1Q'] = data.INDU_NegDMI_200.shift(eff_quarter_size)
data['INDU_Trend_200_lag_1Q'] = data.INDU_Trend_200.shift(eff_quarter_size)
data['INDU_PosDMI_300_lag_1Q'] = data.INDU_PosDMI_300.shift(eff_quarter_size)
data['INDU_NegDMI_300_lag_1Q'] = data.INDU_NegDMI_300.shift(eff_quarter_size)
data['INDU_Trend_300_lag_1Q'] = data.INDU_Trend_300.shift(eff_quarter_size)


data['INDU_PosDMI_100_lag_2Q'] = data.INDU_PosDMI_100.shift(eff_quarter_size * 2)
data['INDU_NegDMI_100_lag_2Q'] = data.INDU_NegDMI_100.shift(eff_quarter_size * 2)
data['INDU_Trend_100_lag_2Q'] = data.INDU_Trend_100.shift(eff_quarter_size * 2)
data['INDU_PosDMI_200_lag_2Q'] = data.INDU_PosDMI_200.shift(eff_quarter_size * 2)
data['INDU_NegDMI_200_lag_2Q'] = data.INDU_NegDMI_200.shift(eff_quarter_size * 2)
data['INDU_Trend_200_lag_2Q'] = data.INDU_Trend_200.shift(eff_quarter_size * 2)
data['INDU_PosDMI_300_lag_2Q'] = data.INDU_PosDMI_300.shift(eff_quarter_size * 2)
data['INDU_NegDMI_300_lag_2Q'] = data.INDU_NegDMI_300.shift(eff_quarter_size * 2)
data['INDU_Trend_300_lag_2Q'] = data.INDU_Trend_300.shift(eff_quarter_size * 2)

In [7]:
data = data.drop(['SPX_RSI_100', 'SPX_RSI_200', 'SPX_RSI_300',
       'SPX_PosDMI_100', 'SPX_NegDMI_100', 'SPX_Trend_100',
       'SPX_PosDMI_200', 'SPX_NegDMI_200', 'SPX_Trend_200',
       'SPX_PosDMI_300', 'SPX_NegDMI_300', 'SPX_Trend_300',
       'SPX_K_100_50', 'SPX_D_100_50_50', 'SPX_K_200_100',
       'SPX_D_200_100_50', 'SPX_K_300_200', 'SPX_D_300_200_200',
       'NYA_RSI_100', 'NYA_RSI_200', 'NYA_RSI_300', 'NYA_PosDMI_100',
       'NYA_NegDMI_100', 'NYA_Trend_100', 'NYA_PosDMI_200',
       'NYA_NegDMI_200', 'NYA_Trend_200', 'NYA_PosDMI_300',
       'NYA_NegDMI_300', 'NYA_Trend_300', 'NYA_K_100_50',
       'NYA_D_100_50_50', 'NYA_K_200_100', 'NYA_D_200_100_50',
       'NYA_K_300_200', 'NYA_D_300_200_200', 'INDU_RSI_100',
       'INDU_RSI_200', 'INDU_RSI_300', 'INDU_PosDMI_100',
       'INDU_NegDMI_100', 'INDU_Trend_100', 'INDU_PosDMI_200',
       'INDU_NegDMI_200', 'INDU_Trend_200', 'INDU_PosDMI_300',
       'INDU_NegDMI_300', 'INDU_Trend_300', 'INDU_K_100_50',
       'INDU_D_100_50_50', 'INDU_K_200_100', 'INDU_D_200_100_50',
       'INDU_K_300_200', 'INDU_D_300_200_200', 'Spread'],axis=1)

Standardise predictors to enhance performance. 

In [8]:
#Standardise predictors
for i in range (2,75):
    data.iloc[:,i] = (data.iloc[:,i] - data.iloc[:,i].mean())/data.iloc[:,i].std()

### 2. Preparation for Model Evaluation and Training

Here we define some functions to evaluate and train models so that we can do it efficiently for different sets of predictors. 

#### Model Evaluation

In [9]:
def model_evaluation(y_true,y_predict, full_result = False):
    M = confusion_matrix(y_true,y_predict)
    accruacy = (M[0,0]+M[1,1])/np.sum(M)
    precision = M[1,1]/np.sum(M[:,1])
    recall = M[1,1]/np.sum(M[1])
    f1 = (2*recall*precision)/(recall+precision)
    
    # Hide the full results if only the F1 score is needed
    if full_result == True:
        print('Accuracy =', accruacy)
        print('Precision =', precision)
        print('Recall =', recall)
        print('F1 =', f1)
        
    return f1

#### Type A and Type B Models

Type A and type B models are trained together by the function ```f```. Note that data points with missing values are dropped only after the boundary of the training set, the validation set and the out-of-sample set are defined in each period.  This is to ensure that all these sets are defined accroding to the defination of effective month and quarter, without being affected by missing values. That is, to make sure we are really predicting one or two  effective quarters ahead. 

In [10]:
def f(predictor,horizon, single_predictor = False):
    
    warnings.filterwarnings('ignore')
    first_prediction = 7701  # Data point 7701 is the first data point in year 2000
    
    # Number of data points we actually observe when making prediction
    in_sample_size = first_prediction - eff_quarter_size * horizon  
    # Number of data points for validation (before removing data points with missing values)
    valid_num = eff_quarter_size * 4 * 12  
    

    data_test = data[first_prediction:].dropna(0,"any")
    y_test_A = data_test['Recession']
    y_test_B = np.empty(0)

    prediction_svm_A = np.empty(0)
    prediction_svm_B = np.empty(0)
    
    # Prepare the series of time for plotting graphs
    date_A = data[first_prediction:].dropna(0,"any")['Date']
    date_B = np.empty(0)         

    # The set of hyperparameters for validation
    C_list = [0.01,0.05,0.1,0.2,0.4,0.6,0.8,1,10]

    while first_prediction <= data.index[-1]:
    
        #Data for training model
        data_in = data[:in_sample_size - valid_num].dropna(0,"any")

        y_in = data_in['Recession']
        
        if single_predictor == False:
            x_in = data_in[predictor]
        
        if single_predictor == True:
            x_in = data_in[predictor].values.reshape(-1, 1)
    
        #Data for validation
        data_valid = data[in_sample_size - valid_num : in_sample_size].dropna(0,"any")
        
        y_valid = data_valid['Recession']
        
        if single_predictor == False:
            x_valid = data_valid[predictor]
    
        if single_predictor == True:
            x_valid = data_valid[predictor].values.reshape(-1, 1)
    
    
        #Data for prediction
        data_out = data[first_prediction: first_prediction + eff_quarter_size].dropna(0,"any")
    
        date_B = np.append(date_B,list(data_out['Date'])[0])
        y_out = data_out['Recession']
        
        if single_predictor == False:
            x_out = data_out[predictor]
        
        if single_predictor == True:
            x_out = data_out[predictor].values.reshape(-1, 1)
    
    
        #Validation
        best_c = None
        best_f1 = 0
        for c in C_list:
            svm = SVC(probability=True,C = c, gamma = 'auto')
            svm.fit(x_in,y_in)
            f1 = model_evaluation(y_valid,svm.predict(x_valid))
        
            if f1 > best_f1:
                best_f1 = f1
                best_c = c

        #Fit model and predict
        svm = SVC(probability = True,C = best_c, gamma = 'auto')
        svm.fit(x_in,y_in)
        
        prediction_svm_A = np.append(prediction_svm_A,svm.predict(x_out))
    
        if np.mean(svm.predict(x_out)) >= 0.5:
            prediction_svm_B = np.append(prediction_svm_B,1)
    
        else:
            prediction_svm_B = np.append(prediction_svm_B,0)
    
        if np.mean(y_out) >= 0.5:
            y_test_B = np.append(y_test_B,1)
    
        else:
            y_test_B = np.append(y_test_B,0)

        #Update period
        first_prediction = first_prediction + eff_quarter_size
        in_sample_size = first_prediction - eff_quarter_size * horizon
        
    return prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B

#### Type C model 

Type C model is trainied seperately by the function ```g```. The model updated (retrained) each effective month. 

In [11]:
def g(predictor, horizon, single_predictor = False):
    
    warnings.filterwarnings('ignore') 
    first_prediction = 7701  # Data point 7701 is the first data point in year 2000
    
    # Number of data points we actually observe when making prediction
    in_sample_size = first_prediction - eff_quarter_size * horizon  
    # Number of data points for validation (before removing data points with missing values)
    valid_num = eff_quarter_size * 4 * 12  

    y_test_C = np.empty(0)
    prediction_svm_C = np.empty(0)
    
    # Prepare the series of time for plotting graphs
    date_C = np.empty(0)

    
    # The set of hyperparameters for validation
    C_list = [0.01,0.05,0.1,0.2,0.4,0.6,0.8,1,10]

    while first_prediction <= data.index[-1]:
    
        #Data for training model
        data_in = data[:in_sample_size - valid_num].dropna(0,"any")

        y_in = data_in['Recession']
        
        if single_predictor == False:
            x_in = data_in[predictor]
            
        if single_predictor == True:
            x_in = data_in[predictor].values.reshape(-1, 1)
    
        #Data for validation
    
        data_valid = data[in_sample_size - valid_num : in_sample_size].dropna(0,"any")

        y_valid = data_valid['Recession']
        
        if single_predictor == False:
            x_valid = data_valid[predictor]
        
        if single_predictor == True:
            x_valid = data_valid[predictor].values.reshape(-1, 1)
    
    
        #Data for prediction
        data_out = data[first_prediction: first_prediction + eff_month_size].dropna(0,"any")
    
        date_C = np.append(date_C,list(data_out['Date'])[0])
        y_out = data_out['Recession']
        
        if single_predictor == False:
            x_out = data_out[predictor]
            
        if single_predictor == True:
            x_out = data_out[predictor].values.reshape(-1, 1)        

    
        #Validation
        best_c = None
        best_f1 = 0
        for c in C_list:
            svm = SVC(probability=True,C = c, gamma = 'auto')
            svm.fit(x_in,y_in)
            f1 = model_evaluation(y_valid,svm.predict(x_valid))
        
            if f1 > best_f1:
                best_f1 = f1
                best_c = c
            
        #Fit model and predict
        svm = SVC(probability=True,C = best_c, gamma = 'auto')
        svm.fit(x_in,y_in)
    
        if np.mean(svm.predict(x_out)) >= 0.5:
            prediction_svm_C = np.append(prediction_svm_C,1)
    
        else:
            prediction_svm_C = np.append(prediction_svm_C,0)
    
        if np.mean(y_out) >= 0.5:
            y_test_C = np.append(y_test_C,1)
    
        else:
            y_test_C = np.append(y_test_C,0)

        #Update period
        first_prediction = first_prediction + eff_month_size
        in_sample_size = first_prediction - eff_quarter_size * horizon
        
    return prediction_svm_C, y_test_C, date_C

### 3. Model Evaluation and Training

We train models with different set of predictors one by one. The results are then saved in pickles as dictionaries so that we do not need to train again after closing the notebook. 

#### Baseline Model

In [12]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f('Spread_lag_1Y', 4, True)

In [13]:
model_evaluation(y_test_A,prediction_svm_A,True) # Performance of Type A model

Accuracy = 0.8881260096930533
Precision = 0.5120967741935484
Recall = 0.44876325088339225
F1 = 0.47834274952919026


0.47834274952919026

In [14]:
model_evaluation(y_test_B,prediction_svm_B,True) # Performance of Type B model

Accuracy = 0.9113924050632911
Precision = 0.625
Recall = 0.5555555555555556
F1 = 0.5882352941176471


0.5882352941176471

In [20]:
# Save the results in a dictionary for easier reference
baseline_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
                'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [21]:
# Save the dictionary into a pickle
with open('Baseline_A_B.pickle', 'wb') as file:
    pickle.dump(baseline_A_B, file)

In [25]:
prediction_svm_C, y_test_C, date_C = g('Spread_lag_1Y', 4, True)

In [26]:
model_evaluation(y_test_C,prediction_svm_C,True) # Performance of Type C model

Accuracy = 0.8940677966101694
Precision = 0.5652173913043478
Recall = 0.4642857142857143
F1 = 0.5098039215686274


0.5098039215686274

In [28]:
# Another dictionary for Type C model
baseline_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [29]:
with open('Baseline_C.pickle', 'wb') as file:
    pickle.dump(baseline_C, file)

#### Yield Spread (-4Q), RSI (-2Q)

In [126]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q',
    'SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q','SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_2Q',
    'INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q'], 2)

In [127]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9111470113085622
Precision = 0.6060606060606061
Recall = 0.6360424028268551
F1 = 0.6206896551724138


0.6206896551724138

In [128]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9240506329113924
Precision = 0.6363636363636364
Recall = 0.7777777777777778
F1 = 0.7000000000000001


0.7000000000000001

In [33]:
RSI_2_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
             'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [34]:
with open('RSI(-2Q)_A_B.pickle', 'wb') as file:
    pickle.dump(RSI_2_A_B, file)

In [95]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q',
    'SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q','SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_2Q',
    'INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q'], 2)

In [96]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9152542372881356
Precision = 0.6428571428571429
Recall = 0.6428571428571429
F1 = 0.6428571428571429


0.6428571428571429

In [37]:
RSI_2_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [38]:
with open('RSI(-2Q)_C.pickle', 'wb') as file:
    pickle.dump(RSI_2_C, file)

#### Yield Spread (-4Q), DMI (-2Q)

In [16]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_PosDMI_100_lag_2Q','NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q',
    'NYA_PosDMI_200_lag_2Q','NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q',
    'NYA_PosDMI_300_lag_2Q','NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q',
    'SPX_PosDMI_100_lag_2Q','SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q',
    'SPX_PosDMI_200_lag_2Q','SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q',
    'SPX_PosDMI_300_lag_2Q','SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q',
    'INDU_PosDMI_100_lag_2Q','INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q',
    'INDU_PosDMI_200_lag_2Q','INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q',
    'INDU_PosDMI_300_lag_2Q','INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 2)

In [17]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.8974151857835219
Precision = 0.5573122529644269
Recall = 0.49823321554770317
F1 = 0.5261194029850746


0.5261194029850746

In [18]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.8987341772151899
Precision = 0.5714285714285714
Recall = 0.4444444444444444
F1 = 0.5


0.5

In [41]:
DMI_2_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
             'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [42]:
with open('DMI(-2Q)_A_B.pickle', 'wb') as file:
    pickle.dump(DMI_2_A_B, file)

In [100]:
prediction_svm_C, y_test_C, date_C = g(['Spread_lag_1Y',
                'NYA_PosDMI_100_lag_2Q','NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q',
                'NYA_PosDMI_200_lag_2Q','NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q',
                'NYA_PosDMI_300_lag_2Q','NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q',
                'SPX_PosDMI_100_lag_2Q','SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q',
                'SPX_PosDMI_200_lag_2Q','SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q',
                'SPX_PosDMI_300_lag_2Q','SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q',
                'INDU_PosDMI_100_lag_2Q','INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q',
                'INDU_PosDMI_200_lag_2Q','INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q',
                'INDU_PosDMI_300_lag_2Q','INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 2)

In [101]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9067796610169492
Precision = 0.625
Recall = 0.5357142857142857
F1 = 0.576923076923077


0.576923076923077

In [45]:
DMI_2_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [46]:
with open('DMI(-2Q)_C.pickle', 'wb') as file:
    pickle.dump(DMI_2_C, file)

#### Yield Spread (-4Q), RSI (-2Q), DMI (-2Q)

In [22]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q',
    'SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q','SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_2Q',
    'INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q','NYA_PosDMI_100_lag_2Q',
    'NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q','NYA_PosDMI_200_lag_2Q',
    'NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q','NYA_PosDMI_300_lag_2Q',
    'NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q','SPX_PosDMI_100_lag_2Q',
    'SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q','SPX_PosDMI_200_lag_2Q',
    'SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q','SPX_PosDMI_300_lag_2Q',
    'SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q','INDU_PosDMI_100_lag_2Q',
    'INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q','INDU_PosDMI_200_lag_2Q',
    'INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q','INDU_PosDMI_300_lag_2Q',
    'INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 2)

In [23]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.8737883683360258
Precision = 0.4594222833562586
Recall = 0.5901060070671378
F1 = 0.5166279969064191


0.5166279969064191

In [24]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.8607594936708861
Precision = 0.4
Recall = 0.4444444444444444
F1 = 0.4210526315789474


0.4210526315789474

In [49]:
RSI_2_DMI_2_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
             'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [50]:
with open('RSI(-2Q)_DMI(-2Q)_A_B.pickle', 'wb') as file:
    pickle.dump(RSI_2_DMI_2_A_B, file)

In [104]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q',
    'SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q','SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_2Q',
    'INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q','NYA_PosDMI_100_lag_2Q',
    'NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q','NYA_PosDMI_200_lag_2Q',
    'NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q','NYA_PosDMI_300_lag_2Q',
    'NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q','SPX_PosDMI_100_lag_2Q',
    'SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q','SPX_PosDMI_200_lag_2Q',
    'SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q','SPX_PosDMI_300_lag_2Q',
    'SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q','INDU_PosDMI_100_lag_2Q',
    'INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q','INDU_PosDMI_200_lag_2Q',
    'INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q','INDU_PosDMI_300_lag_2Q',
    'INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 2)

In [105]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.8940677966101694
Precision = 0.5454545454545454
Recall = 0.6428571428571429
F1 = 0.5901639344262296


0.5901639344262296

In [53]:
RSI_2_DMI_2_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [54]:
with open('RSI(-2Q)_DMI(-2Q)_C.pickle', 'wb') as file:
    pickle.dump(RSI_2_DMI_2_C, file)

#### Yield Spread (-4Q), RSI (-1Q)

In [29]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'SPX_RSI_100_lag_1Q','SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','INDU_RSI_100_lag_1Q',
    'INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q'], 1)

In [30]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9505250403877221
Precision = 0.8282208588957055
Recall = 0.715547703180212
F1 = 0.7677725118483413


0.7677725118483413

In [31]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9620253164556962
Precision = 0.875
Recall = 0.7777777777777778
F1 = 0.823529411764706


0.823529411764706

In [58]:
RSI_1_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
             'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [59]:
with open('RSI(-1Q)_A_B.pickle', 'wb') as file:
    pickle.dump(RSI_1_A_B, file)

In [13]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'SPX_RSI_100_lag_1Q','SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q',
    'INDU_RSI_100_lag_1Q','INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q'], 1)

In [14]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9576271186440678
Precision = 0.875
Recall = 0.75
F1 = 0.8076923076923077


0.8076923076923077

In [62]:
RSI_1_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [63]:
with open('RSI(-1Q)_C.pickle', 'wb') as file:
    pickle.dump(RSI_1_C, file)

#### Yield Spread (-4Q), DMI (-1Q)

In [38]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_PosDMI_100_lag_1Q','NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q',
    'NYA_PosDMI_200_lag_1Q','NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q',
    'NYA_PosDMI_300_lag_1Q','NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q',
    'SPX_PosDMI_100_lag_1Q','SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q',
    'SPX_PosDMI_200_lag_1Q','SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q',
    'SPX_PosDMI_300_lag_1Q','SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q',
    'INDU_PosDMI_100_lag_1Q','INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q',
    'INDU_PosDMI_200_lag_1Q','INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q',
    'INDU_PosDMI_300_lag_1Q','INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q'], 1)

In [39]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9198303715670436
Precision = 0.6251851851851852
Recall = 0.7455830388692579
F1 = 0.6800966962127317


0.6800966962127317

In [40]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9493670886075949
Precision = 0.7272727272727273
Recall = 0.8888888888888888
F1 = 0.7999999999999999


0.7999999999999999

In [66]:
DMI_1_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
             'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [67]:
with open('DMI(-1Q)_A_B.pickle', 'wb') as file:
    pickle.dump(DMI_1_A_B, file)

In [19]:
prediction_svm_C, y_test_C, date_C = g(['Spread_lag_1Y',
                'NYA_PosDMI_100_lag_1Q','NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q',
                'NYA_PosDMI_200_lag_1Q','NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q',
                'NYA_PosDMI_300_lag_1Q','NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q',
                'SPX_PosDMI_100_lag_1Q','SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q',
                'SPX_PosDMI_200_lag_1Q','SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q',
                'SPX_PosDMI_300_lag_1Q','SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q',
                'INDU_PosDMI_100_lag_1Q','INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q',
                'INDU_PosDMI_200_lag_1Q','INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q',
                'INDU_PosDMI_300_lag_1Q','INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q'], 1)

In [20]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9364406779661016
Precision = 0.696969696969697
Recall = 0.8214285714285714
F1 = 0.7540983606557378


0.7540983606557378

In [70]:
DMI_1_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [71]:
with open('DMI(-1Q)_C.pickle', 'wb') as file:
    pickle.dump(DMI_1_C, file)

#### Yield Spread (-4Q), RSI (-1Q), DMI (-1Q)

In [44]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'SPX_RSI_100_lag_1Q','SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','INDU_RSI_100_lag_1Q',
    'INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q','NYA_PosDMI_100_lag_1Q',
    'NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q','NYA_PosDMI_200_lag_1Q',
    'NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q','NYA_PosDMI_300_lag_1Q',
    'NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q','SPX_PosDMI_100_lag_1Q',
    'SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q','SPX_PosDMI_200_lag_1Q',
    'SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q','SPX_PosDMI_300_lag_1Q',
    'SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q','INDU_PosDMI_100_lag_1Q',
    'INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q','INDU_PosDMI_200_lag_1Q',
    'INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q','INDU_PosDMI_300_lag_1Q',
    'INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q'], 1)

In [45]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9478998384491115
Precision = 0.7347560975609756
Recall = 0.8515901060070671
F1 = 0.7888707037643208


0.7888707037643208

In [46]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9620253164556962
Precision = 0.8
Recall = 0.8888888888888888
F1 = 0.8421052631578948


0.8421052631578948

In [74]:
RSI_1_DMI_1_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
                   'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [75]:
with open('RSI(-1Q)_DMI(-1Q)_A_B.pickle', 'wb') as file:
    pickle.dump(RSI_1_DMI_1_A_B, file)

In [24]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'SPX_RSI_100_lag_1Q','SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','INDU_RSI_100_lag_1Q',
    'INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q','NYA_PosDMI_100_lag_1Q',
    'NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q','NYA_PosDMI_200_lag_1Q',
    'NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q','NYA_PosDMI_300_lag_1Q',
    'NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q','SPX_PosDMI_100_lag_1Q',
    'SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q','SPX_PosDMI_200_lag_1Q',
    'SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q','SPX_PosDMI_300_lag_1Q',
    'SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q','INDU_PosDMI_100_lag_1Q',
    'INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q','INDU_PosDMI_200_lag_1Q',
    'INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q','INDU_PosDMI_300_lag_1Q',
    'INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q'], 1)

In [25]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9576271186440678
Precision = 0.78125
Recall = 0.8928571428571429
F1 = 0.8333333333333334


0.8333333333333334

In [78]:
RSI_1_DMI_1_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [79]:
with open('RSI(-1Q)_DMI(-1Q)_C.pickle', 'wb') as file:
    pickle.dump(RSI_1_DMI_1_C, file)

#### Yield Spread (-4Q), RSI (-2Q), RSI (-1Q)

In [67]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q',
    'SPX_RSI_100_lag_1Q','SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q',
    'SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q','SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_1Q',
    'INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q','INDU_RSI_100_lag_2Q','INDU_RSI_200_lag_2Q',
    'INDU_RSI_300_lag_2Q'], 1)

In [68]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9460823909531503
Precision = 0.7655417406749556
Recall = 0.7614840989399293
F1 = 0.7635075287865368


0.7635075287865368

In [69]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9493670886075949
Precision = 0.7777777777777778
Recall = 0.7777777777777778
F1 = 0.7777777777777778


0.7777777777777778

In [82]:
RSI_2_RSI_1_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
                   'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [83]:
with open('RSI(-2Q)_RSI(-1Q)_A_B.pickle', 'wb') as file:
    pickle.dump(RSI_2_RSI_1_A_B, file)

In [28]:
prediction_svm_C, y_test_C, date_C= g([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q','SPX_RSI_100_lag_1Q',
    'SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q',
    'SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_1Q','INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q',
    'INDU_RSI_100_lag_2Q','INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q'], 1)

In [29]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9491525423728814
Precision = 0.7857142857142857
Recall = 0.7857142857142857
F1 = 0.7857142857142857


0.7857142857142857

In [86]:
RSI_2_RSI_1_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [87]:
with open('RSI(-2Q)_RSI(-1Q)_C.pickle', 'wb') as file:
    pickle.dump(RSI_2_RSI_1_C, file)

#### Yield Spread (-4Q), DMI (-2Q), DMI (-1Q)

In [58]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_PosDMI_100_lag_1Q','NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q',
    'NYA_PosDMI_200_lag_1Q','NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q',
    'NYA_PosDMI_300_lag_1Q','NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q',
    'NYA_PosDMI_100_lag_2Q','NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q',
    'NYA_PosDMI_200_lag_2Q','NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q',
    'NYA_PosDMI_300_lag_2Q','NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q',
    'SPX_PosDMI_100_lag_1Q','SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q',
    'SPX_PosDMI_200_lag_1Q','SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q',
    'SPX_PosDMI_300_lag_1Q','SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q',
    'SPX_PosDMI_100_lag_2Q','SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q',
    'SPX_PosDMI_200_lag_2Q','SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q',
    'SPX_PosDMI_300_lag_2Q','SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q',
    'INDU_PosDMI_100_lag_1Q','INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q',
    'INDU_PosDMI_200_lag_1Q','INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q',
    'INDU_PosDMI_300_lag_1Q','INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q',
    'INDU_PosDMI_100_lag_2Q','INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q',
    'INDU_PosDMI_200_lag_2Q','INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q',
    'INDU_PosDMI_300_lag_2Q','INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 1)

In [59]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9301292407108239
Precision = 0.6462765957446809
Recall = 0.8586572438162544
F1 = 0.7374810318664643


0.7374810318664643

In [60]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9367088607594937
Precision = 0.6666666666666666
Recall = 0.8888888888888888
F1 = 0.761904761904762


0.761904761904762

In [90]:
DMI_2_DMI_1_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
                   'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [91]:
with open('DMI(-2Q)_DMI(-1Q)_A_B.pickle', 'wb') as file:
    pickle.dump(DMI_2_DMI_1_A_B, file)

In [13]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_PosDMI_100_lag_1Q','NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q',
    'NYA_PosDMI_200_lag_1Q','NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q',
    'NYA_PosDMI_300_lag_1Q','NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q',
    'NYA_PosDMI_100_lag_2Q','NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q',
    'NYA_PosDMI_200_lag_2Q','NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q',
    'NYA_PosDMI_300_lag_2Q','NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q',
    'SPX_PosDMI_100_lag_1Q','SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q',
    'SPX_PosDMI_200_lag_1Q','SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q',
    'SPX_PosDMI_300_lag_1Q','SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q',
    'SPX_PosDMI_100_lag_2Q','SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q',
    'SPX_PosDMI_200_lag_2Q','SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q',
    'SPX_PosDMI_300_lag_2Q','SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q',
    'INDU_PosDMI_100_lag_1Q','INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q',
    'INDU_PosDMI_200_lag_1Q','INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q',
    'INDU_PosDMI_300_lag_1Q','INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q',
    'INDU_PosDMI_100_lag_2Q','INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q',
    'INDU_PosDMI_200_lag_2Q','INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q',
    'INDU_PosDMI_300_lag_2Q','INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 1)

In [14]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9364406779661016
Precision = 0.6857142857142857
Recall = 0.8571428571428571
F1 = 0.7619047619047619


0.7619047619047619

In [94]:
DMI_2_DMI_1_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [95]:
with open('DMI(-2Q)_DMI(-1Q)_C.pickle', 'wb') as file:
    pickle.dump(DMI_2_DMI_1_C, file)

#### Comprehensive Model

In [73]:
prediction_svm_A, prediction_svm_B, y_test_A, y_test_B, date_A, date_B = f([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q','SPX_RSI_100_lag_1Q',
    'SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q',
    'SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_1Q','INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q',
    'INDU_RSI_100_lag_2Q','INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q','NYA_PosDMI_100_lag_1Q',
    'NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q','NYA_PosDMI_200_lag_1Q',
    'NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q','NYA_PosDMI_300_lag_1Q',
    'NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q','NYA_PosDMI_100_lag_2Q',
    'NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q','NYA_PosDMI_200_lag_2Q',
    'NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q','NYA_PosDMI_300_lag_2Q',
    'NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q','SPX_PosDMI_100_lag_1Q',
    'SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q','SPX_PosDMI_200_lag_1Q',
    'SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q','SPX_PosDMI_300_lag_1Q',
    'SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q','SPX_PosDMI_100_lag_2Q',
    'SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q','SPX_PosDMI_200_lag_2Q',
    'SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q','SPX_PosDMI_300_lag_2Q',
    'SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q','INDU_PosDMI_100_lag_1Q',
    'INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q','INDU_PosDMI_200_lag_1Q',
    'INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q','INDU_PosDMI_300_lag_1Q',
    'INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q','INDU_PosDMI_100_lag_2Q',
    'INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q','INDU_PosDMI_200_lag_2Q',
    'INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q','INDU_PosDMI_300_lag_2Q',
    'INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 1)

In [74]:
model_evaluation(y_test_A,prediction_svm_A,True)

Accuracy = 0.9545638126009693
Precision = 0.7548579970104634
Recall = 0.892226148409894
F1 = 0.8178137651821862


0.8178137651821862

In [75]:
model_evaluation(y_test_B,prediction_svm_B,True)

Accuracy = 0.9746835443037974
Precision = 0.8181818181818182
Recall = 1.0
F1 = 0.9


0.9

In [99]:
Comprehensive_A_B = {'prediction_A':prediction_svm_A, 'prediction_B':prediction_svm_B, 
                     'actual_A':y_test_A, 'actual_B':y_test_B, 'date_A':date_A, 'date_B':date_B}

In [100]:
with open('Comprehensive_A_B.pickle', 'wb') as file:
    pickle.dump(Comprehensive_A_B, file)

In [17]:
prediction_svm_C, y_test_C, date_C = g([
    'Spread_lag_1Y','NYA_RSI_100_lag_1Q','NYA_RSI_200_lag_1Q','NYA_RSI_300_lag_1Q',
    'NYA_RSI_100_lag_2Q','NYA_RSI_200_lag_2Q','NYA_RSI_300_lag_2Q','SPX_RSI_100_lag_1Q',
    'SPX_RSI_200_lag_1Q','SPX_RSI_300_lag_1Q','SPX_RSI_100_lag_2Q','SPX_RSI_200_lag_2Q',
    'SPX_RSI_300_lag_2Q','INDU_RSI_100_lag_1Q','INDU_RSI_200_lag_1Q','INDU_RSI_300_lag_1Q',
    'INDU_RSI_100_lag_2Q','INDU_RSI_200_lag_2Q','INDU_RSI_300_lag_2Q',
    'NYA_PosDMI_100_lag_1Q','NYA_NegDMI_100_lag_1Q','NYA_Trend_100_lag_1Q',
    'NYA_PosDMI_200_lag_1Q','NYA_NegDMI_200_lag_1Q','NYA_Trend_200_lag_1Q',
    'NYA_PosDMI_300_lag_1Q','NYA_NegDMI_300_lag_1Q','NYA_Trend_300_lag_1Q',
    'NYA_PosDMI_100_lag_2Q','NYA_NegDMI_100_lag_2Q','NYA_Trend_100_lag_2Q',
    'NYA_PosDMI_200_lag_2Q','NYA_NegDMI_200_lag_2Q','NYA_Trend_200_lag_2Q',
    'NYA_PosDMI_300_lag_2Q','NYA_NegDMI_300_lag_2Q','NYA_Trend_300_lag_2Q',
    'SPX_PosDMI_100_lag_1Q','SPX_NegDMI_100_lag_1Q','SPX_Trend_100_lag_1Q',
    'SPX_PosDMI_200_lag_1Q','SPX_NegDMI_200_lag_1Q','SPX_Trend_200_lag_1Q',
    'SPX_PosDMI_300_lag_1Q','SPX_NegDMI_300_lag_1Q','SPX_Trend_300_lag_1Q',
    'SPX_PosDMI_100_lag_2Q','SPX_NegDMI_100_lag_2Q','SPX_Trend_100_lag_2Q',
    'SPX_PosDMI_200_lag_2Q','SPX_NegDMI_200_lag_2Q','SPX_Trend_200_lag_2Q',
    'SPX_PosDMI_300_lag_2Q','SPX_NegDMI_300_lag_2Q','SPX_Trend_300_lag_2Q',
    'INDU_PosDMI_100_lag_1Q','INDU_NegDMI_100_lag_1Q','INDU_Trend_100_lag_1Q',
    'INDU_PosDMI_200_lag_1Q','INDU_NegDMI_200_lag_1Q','INDU_Trend_200_lag_1Q',
    'INDU_PosDMI_300_lag_1Q','INDU_NegDMI_300_lag_1Q','INDU_Trend_300_lag_1Q',
    'INDU_PosDMI_100_lag_2Q','INDU_NegDMI_100_lag_2Q','INDU_Trend_100_lag_2Q',
    'INDU_PosDMI_200_lag_2Q','INDU_NegDMI_200_lag_2Q','INDU_Trend_200_lag_2Q',
    'INDU_PosDMI_300_lag_2Q','INDU_NegDMI_300_lag_2Q','INDU_Trend_300_lag_2Q'], 1)

In [18]:
model_evaluation(y_test_C,prediction_svm_C,True)

Accuracy = 0.9703389830508474
Precision = 0.8387096774193549
Recall = 0.9285714285714286
F1 = 0.8813559322033899


0.8813559322033899

In [103]:
Comprehensive_C = {'prediction_C':prediction_svm_C,'actual_C':y_test_C, 'date_C':date_C}

In [104]:
with open('Comprehensive_C.pickle', 'wb') as file:
    pickle.dump(Comprehensive_C, file)