<a href="https://colab.research.google.com/github/olivier2106/SP/blob/master/VIX30_modeling_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Processing
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time

# Modeling
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa as tsa
import scipy.stats as scs
import scipy.signal as signal
from arch import arch_model

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.svm import SVC

# Stats, Data Viz, analytics
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.fftpack import fft
import pyfolio as pf


# Fonction d'analyse statistiques
def describe_plus(df, quantls = [5,25,50,75,95]):
    """
    Retourne un dataframe présentant les indicateurs statistiques principaux 
    (min, max, mean, std, skew) avec quantiles personnalisés pour chaque colonne du df
    Prend en entrée: le dataframe df
                     les quantiles (en %) : par défaut [5,25,50,75,95]
    """
    df_stats = pd.DataFrame()
    idx = ['min']
    idx.extend([str(q)+'%' for q in quantls])
    idx.extend(['max','mean','std','skew'])
    df_stats = pd.DataFrame(index = idx, columns = df.columns)
    for q in quantls:
        vals = df.quantile(q*0.01).values
        df_stats.loc[str(q)+'%',:] = vals
    df_stats.loc['min',:] = df.min().values
    df_stats.loc['max',:] = df.max().values
    df_stats.loc['mean',:] = df.mean().values
    df_stats.loc['std',:] = df.std().values
    df_stats.loc['skew',:] = df.skew().values
    
    return df_stats

import warnings
warnings.filterwarnings("ignore") 

In [0]:
# to plot time series - trace, autocorrelation, partial autocorrelation, QQ plot
def tsplot(y, lags=None, figsize=(15, 10), style='bmh'):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    with plt.style.context(style):    
        fig = plt.figure(figsize=figsize)
        #mpl.rcParams['font.family'] = 'Ubuntu Mono'
        layout = (3, 2)
        ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1, 0))
        pacf_ax = plt.subplot2grid(layout, (1, 1))
        qq_ax = plt.subplot2grid(layout, (2, 0))
        pp_ax = plt.subplot2grid(layout, (2, 1))
        
        y.plot(ax=ts_ax)
        ts_ax.set_title('Time Series Analysis Plots')
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.05)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.05)
        sm.qqplot(y, line='s', ax=qq_ax)
        qq_ax.set_title('QQ Plot')        
        scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)

        plt.tight_layout()
    return

## 1/ Import vix30 and all stocks data

In [0]:
VIX30 = pd.read_csv('./data/VIX30_daily.csv')[['Date','Close','Min','Max','Volume','fitSlope','fitStd']]
VIX = pd.read_csv('./data/VIX_daily.csv')[['Date','Close','Min','Max','Downticks','Upticks','fitSlope','fitStd']]
SPX = pd.read_csv('./data/SPX_daily.csv')[['Date','Close','Min','Max','Downticks','Upticks','fitSlope','fitStd']]
TVIX = pd.read_csv('./data/TVIX_daily.csv')[['Date','Close','Min','Max','Volume','fitSlope','fitStd']]

for df in VIX, SPX:
    df['Volume'] = (df['Downticks'] + df['Upticks']) / 2
    df.drop(['Downticks','Upticks'], axis = 1, inplace = True)

stocks = [VIX30, VIX, SPX, TVIX]

## 2/ Create features according to article

![image.png](attachment:image.png)

In [0]:
# Choose values for moving averages :
n = 15 # Basic day span for MA, VolMA, high, low, momentum
nmin_EMA = 12 # day span for minEMA
nmax_EMA = 26 # day span for maxEMA

def pipe(df):
    
    #### Process date to index
   
    df = df.fillna(df.mean())
    df.index = pd.DatetimeIndex(df['Date'], freq ='infer')
    df.drop('Date', axis = 1, inplace = True)
    
    #### daily return and return sign (-1 if ret < 0 and +1 if ret > 0 )
    
    ret = [np.nan]
    ret_sgn = [np.nan]
    
    for i in range(1, df.shape[0]):
        ret.append((df['Close'][i]/df['Close'][i-1]-1))
        ret_sgn.append(np.sign((df['Close'][i]/df['Close'][i-1]-1)))
        
    df['return'] = ret
    df['retsign'] = ret_sgn
    
    ### Medium value
    df['Medium'] = (df['Max'] + df['Min'] + df['Close'])/3
    
    #### Moving averages
    
    df[str(n) + '_SMA'] = df['Close'].rolling(n, min_periods = 1).mean()
    df[str(n) +'_VolSMA'] = df['Volume'].rolling(n, min_periods = 1).mean()
    df[str(n) + '_std'] = df['Close'].rolling(n, min_periods = 1).std()
    df[str(n) + '_WMA'] = df['Close'].rolling(n, min_periods = 1, win_type = 'triang').mean()
    df[str(n) + '_skew'] = df['Close'].rolling(n, min_periods = 1).apply(lambda x : skew(x))
    df[str(n) + '_high'] = df['Max'].rolling(n, min_periods = 1).max()
    df[str(n) + '_low'] = df['Min'].rolling(n, min_periods = 1).min()
    df[str(n) + '_momentum'] = df['Close'].rolling(n, min_periods = 1).apply(lambda x : x[-1] - x[0])
    
    df['minEMA'] = df['Close'].ewm(span = nmin_EMA, min_periods = 1).mean()
    df['maxEMA'] = df['Close'].ewm(span = nmax_EMA, min_periods = 1).mean()
    
    return df


# Apply pipe to df
stocks = [pipe(df) for df in stocks]

In [0]:
stocks[1]

Unnamed: 0_level_0,Close,Min,Max,fitSlope,fitStd,Volume,return,retsign,Medium,15_SMA,15_VolSMA,15_std,15_WMA,15_skew,15_high,15_low,15_momentum,minEMA,maxEMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-01-03,22.97,22.55,23.09,0.007589,0.001678,176.0,,,22.870000,22.970000,176.000000,,22.970000,0.000000e+00,23.09,22.55,0.00,22.970000,22.970000
2012-01-04,22.22,22.22,23.66,-0.039787,0.002408,159.0,-0.032651,-1.0,22.700000,22.595000,167.500000,0.530330,22.720000,0.000000e+00,23.66,22.22,-0.75,22.563750,22.580577
2012-01-05,21.48,21.38,23.09,-0.044584,0.002558,193.5,-0.033303,-1.0,21.983333,22.223333,176.166667,0.745006,22.471667,8.219538e-03,23.66,21.38,-1.49,22.140762,22.185150
2012-01-06,20.73,20.58,21.71,-0.026366,0.002916,158.5,-0.034916,-1.0,21.006667,21.850000,171.750000,0.963085,22.223000,-6.505834e-15,23.66,20.58,-2.24,21.695440,21.778354
2012-01-09,21.07,21.01,21.77,-0.017390,0.001395,186.0,0.016401,1.0,21.283333,21.694000,174.600000,0.904063,22.046667,4.064376e-01,23.66,20.58,-1.90,21.525510,21.614084
2012-01-10,20.69,20.06,20.69,0.014662,0.001348,175.5,-0.018035,-1.0,20.480000,21.526667,174.750000,0.906569,21.898095,6.332098e-01,23.66,20.06,-2.28,21.322437,21.428997
2012-01-11,21.05,20.99,21.21,-0.001333,0.000669,151.5,0.017400,1.0,21.083333,21.458571,171.428571,0.846964,21.788214,8.671410e-01,23.66,20.06,-1.92,21.261644,21.361594
2012-01-12,20.47,20.47,22.02,-0.031716,0.002698,206.0,-0.027553,-1.0,20.986667,21.335000,175.750000,0.858504,21.687500,9.384622e-01,23.66,20.06,-2.50,21.096440,21.217936
2012-01-13,21.42,21.39,22.28,-0.002358,0.003427,216.5,0.046409,1.0,21.696667,21.344444,180.277778,0.803556,21.556047,9.562900e-01,23.66,20.06,-1.55,21.160452,21.247887
2012-01-17,22.20,20.74,22.22,0.035249,0.003428,210.0,0.036415,1.0,21.720000,21.430000,183.250000,0.804460,21.445510,6.450931e-01,23.66,20.06,-0.77,21.357446,21.379269


In [0]:
# Indicators functions

def stochastic_k(Close, n_low, n_high):
    """
    Stochastic-k indicator (%)
    
    Variables : 
        - n_high : highest high in the last n days
        - n_low : lowest low in the last n days
        - Close : closing price at time t
    """
    
    ind = 100 * (Close - n_low) / (n_high - n_low)
    
    return ind

def RSI(n_upchanges, n_downchanges):
    """
    Relative strength index (RSI) indicator
    
    Variables :
        - n_upchanges : number of daily upchanges (i.e positive returns) in the last n days
        - n_downchanges : number of daily upchanges (i.e negative returns) in the last n days
    """
    
    ind = 100 - 100 / (1 + n_upchanges / n_downchanges)
    
    return ind
    
def MACD(min_EMA, max_EMA):
    """
    Moving average convergence divergence (MACD) indicator
    Oscillating indicator tracking market momentum (buy and sell signals)
    
    Variables :
        - min_EMA and max_EMA : exponential moving averages (min and max)
    """
    ind = min_EMA - max_EMA
    
    return ind

def AD_oscillator(High,Low,Close_tm1):
    """
    Accumulation / Distribution Indicator
    
    Variables :
        - High : highest price at t
        - Low : lowest price at
        - Close : closing price at time t - 1
    """
    if (High - Low) != 0:
        ind = (High - Close_tm1)/(High - Low)
    else: ind = (High - Close_tm1)
    
    return ind

def CCI(Medium, SM, D):
    """
    Commodity channel index
    
    Variables :
        - Medium : (highest + low + close) /3 price at t
        - SM : rolling average of medium in the last n days
        - D : rolling average of abs(medium - n_medium) in the last n days
    """
    ind = (Medium - SM)/(0.0015 * D)
    
    return ind

In [0]:
# Calculating indicators values to inject to df

def indicators(df):
    
    # Stochastic k
    df['stoc_k'] = df.apply(lambda x: stochastic_k(x['Close'],x[str(n)+'_low'],x[str(n)+'_high']), axis = 1)
    
    # Stochastic d
    df['stoc_d'] = df['stoc_k'].rolling(n, min_periods = 1).mean()
    
    #RSI
    sgn_sum = df['retsign'].rolling(n, min_periods = 1).sum()
    winsize = df['retsign'].rolling(n, min_periods = 1).count()
    df['n_upchanges'] = (sgn_sum + winsize) / 2
    df['n_downchanges'] = winsize - df['n_upchanges']
    df['RSI'] = df.apply(lambda x: RSI(x['n_upchanges'], x['n_downchanges']), axis = 1)
    df.drop(['n_upchanges','n_downchanges'], axis = 1, inplace = True)
    
    #MACD
    df['MACD'] = df.apply(lambda x: MACD(x['minEMA'],x['maxEMA']), axis = 1)
    
    #AD_Oscillator
    df['close_tm1'] = df['Close'].shift(1)
    df['AD_osc'] = df.apply(lambda x: AD_oscillator(x['Max'],x['Min'],x['close_tm1']), axis = 1)
    df.drop(['close_tm1'], axis = 1, inplace = True)
    
    #CCI
    df['SM'] = df['Medium'].rolling(n, min_periods = 1).mean()
    df['diff'] = np.abs(df['SM'] - df['Medium'])
    df['D'] = df['diff'].rolling(n, min_periods = 1).mean()
    df['CCI'] = df.apply(lambda x : CCI(x['Medium'],x['SM'],x['D']), axis = 1)
    df.drop(['SM','diff','D'], axis = 1, inplace = True)
    
    df.drop(df.index[0], inplace = True)
    
    #Preparing y (return at t+1)
    df['return'] = df['return'].shift(-1)
    df['retsign'] = df['retsign'].shift(-1)
    
    df.drop(df.index[-1], inplace = True)
    return df
    
stocks = [indicators(df) for df in stocks]

In [0]:
stocks[0].isna().sum()

Close          0
Min            0
Max            0
Volume         0
fitSlope       0
fitStd         0
return         0
retsign        0
Medium         0
15_SMA         0
15_VolSMA      0
15_std         0
15_WMA         0
15_skew        0
15_high        0
15_low         0
15_momentum    0
minEMA         0
maxEMA         0
stoc_k         0
stoc_d         0
RSI            0
MACD           0
AD_osc         0
CCI            0
dtype: int64

In [0]:
stocks[0].columns

Index(['Close', 'Min', 'Max', 'Volume', 'fitSlope', 'fitStd', 'return',
       'retsign', 'Medium', '15_SMA', '15_VolSMA', '15_std', '15_WMA',
       '15_skew', '15_high', '15_low', '15_momentum', 'minEMA', 'maxEMA',
       'stoc_k', 'stoc_d', 'RSI', 'MACD', 'AD_osc', 'CCI'],
      dtype='object')

#### Selecting all relevant columns for modeling :
    - Target : 'retsign'
    - Variables (6) : 'Close','Volume','fitStd','n_SMA','n_VolSMA','n_std'
    - Technical indicators (6) : 'stoc_k', 'stoc_d', 'RSI', 'MACD', 'AD_osc', 'CCI'

In [0]:
stocks[0]

Unnamed: 0_level_0,Close,Min,Max,Volume,fitSlope,fitStd,return,retsign,Medium,15_SMA,...,15_low,15_momentum,minEMA,maxEMA,stoc_k,stoc_d,RSI,MACD,AD_osc,CCI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-07-03,18.610000,18.610000,19.222857,582.253456,-0.010834,0.002925,1.0,1.0,18.814286,18.800000,...,18.610000,-0.380000,18.784167,18.792692,0.000000,0.000000,0.000000,-0.008526,0.379953,-1333.333333
2012-07-05,19.635714,18.857143,19.871429,498.080000,0.005483,0.002181,-1.0,-1.0,19.454762,19.078571,...,18.610000,0.645714,19.116526,19.095582,61.736887,20.578962,50.000000,0.020944,1.243662,857.056876
2012-07-06,19.301429,19.301429,20.397714,508.169143,-0.006336,0.002097,1.0,1.0,19.666857,19.134286,...,18.610000,0.311429,19.174892,19.153128,38.676682,25.103392,33.333333,0.021765,0.695074,1023.594745
2012-07-09,19.467143,19.440000,20.049143,445.149143,-0.006400,0.001057,1.0,1.0,19.652095,19.200857,...,18.610000,0.477143,19.254296,19.225949,47.946300,29.671974,50.000000,0.028347,1.227486,755.913730
2012-07-10,20.364286,19.128571,20.480000,572.814286,0.021728,0.001722,-1.0,-1.0,19.990952,19.394762,...,18.610000,1.374286,19.524082,19.453949,93.812070,40.361990,60.000000,0.070133,0.749471,1229.987778
2012-07-11,19.590000,19.554000,20.470000,634.056000,-0.004303,0.001787,1.0,1.0,19.871333,19.422653,...,18.610000,0.600000,19.538791,19.478145,52.406417,42.082622,50.000000,0.060646,0.115409,775.949423
2012-07-12,19.838571,19.557143,20.622286,633.045143,-0.017336,0.001904,-1.0,-1.0,20.006000,19.474643,...,18.610000,0.848571,19.601351,19.536218,61.053528,44.453986,57.142857,0.065132,0.969152,914.444247
2012-07-13,18.681429,18.681429,19.714286,719.332000,-0.015793,0.001536,1.0,1.0,19.025714,19.386508,...,18.610000,-0.308571,19.419357,19.409520,3.549624,39.909057,50.000000,0.009838,-0.120332,-1098.405137
2012-07-16,18.760000,18.522857,19.141429,565.084571,-0.007886,0.000998,-1.0,-1.0,18.808095,19.323857,...,18.522857,-0.230000,19.294409,19.319892,11.295591,37.047710,55.555556,-0.025483,0.743649,-1263.718890
2012-07-17,18.065714,17.952000,18.904286,854.261143,-0.014437,0.001792,1.0,1.0,18.307333,19.209481,...,17.952000,-0.924286,19.069588,19.157225,4.258506,34.066873,50.000000,-0.087637,0.151515,-1703.372920


In [0]:
# Basic Variables as features
feat1 = ['retsign','Close','Volume','fitStd',str(n)+'_SMA', str(n)+'_VolSMA']

# Technical Indicators as features
feat2 = ['retsign','stoc_k','stoc_d','RSI','MACD','AD_osc','CCI']

# Both features used
feat3 = ['retsign','Close','Volume','fitStd',str(n)+'_SMA', str(n)+'_VolSMA', str(n)+'_std', str(n)+'_std', 'stoc_k','stoc_d','RSI','MACD','AD_osc','CCI']

stocks_feat1 = [df[feat1] for df in stocks]
stocks_feat2 = [df[feat2] for df in stocks]
stocks_feat3 = [df[feat3] for df in stocks]

In [0]:
stocks2[0].columns

Index(['retsign', 'Close', 'Volume', 'fitStd', '15_SMA', '15_VolSMA', '15_std',
       'stoc_k', 'stoc_d', 'RSI', 'MACD', 'AD_osc', 'CCI'],
      dtype='object')

### Modeling Strategy


__3 model types__ : 
- SGD (linear model with SGD optimization)
- SVM
- Random Forest
- Gradient Boosted Trees

In [0]:
def create_train_test_splits(df, train_start, train_stop, test_stop):
    """ Creates rolling 1-day train test splits used for cross-validation of the predictive model:
    
    For example, if the overall train set ranges from 2012-01-01 (train_start) to 2012-12-31 (train_stop)
    and predictions are evaluated for the following month, i.e. 2013-01-01 to 2013-01-31 (test_stop)
    train test splits for cv will be:
    
    1set: train (2012-01-01 to 2012-12-31) -> 1 day test 2013-01-01
    2set: train (2012-01-01 to 2013-01-01) -> 1 day test 2013-01-02
    3set: train (2012-01-01 to 2013-01-02) -> 1 day test 2013-01-03
    etc.
    nset : train (2012-01-01 to 2013-01-30) -> 1 day test 2013-01-31
    
    Returns iterable object containing train/test splits to be inputed in CV
    """

    train_start_idx = df.index.get_loc(train_start)
    train_stop_idx = df.index.get_loc(train_stop)
    test_stop_idx = df.index.get_loc(test_stop)

    train_test_splits = (([*range(train_start_idx,i)],[i]) for i in range(train_stop_idx, test_stop_idx))
    
    print('Train set : from {0} to {1}, {2} samples'.format(train_start, train_stop,
                                                            len(range(train_start_idx, train_stop_idx))))
    print('Test set : from {0} to {1}, {2} samples'.format(df.index[train_stop_idx+1], test_stop,
                                                           len(range(train_stop_idx+1, test_stop_idx))))
    
    return train_test_splits

In [0]:
def model_fit_results(df,clf_model, train_start, train_stop, test_stop):
    
    print('Fitting model ...')
    start = time.time()
    
    scaler = StandardScaler(with_std = True)
    
    y = df['retsign']
    x = df.drop('retsign', axis = 1)
    x = pd.DataFrame(scaler.fit_transform(x), index = x.index, columns = x.columns)
    
    r_scores = []
    train_test_splits = create_train_test_splits(df, train_start, train_stop, test_stop)
    
    for train_index, test_index in train_test_splits:
        
        n_samples = len(train_index)
        alpha = 2/(n_samples+1)
        
        sample_weights = [(1-alpha)**(n_samples-(i+1)) for i in range(0,n_samples)]
        #sample_weights = np.ones(n_samples)  
        clf_results = clf_model.fit(x.iloc[train_index], y.iloc[train_index], sample_weight = sample_weights)
        score = clf_results.score(x.iloc[test_index], y.iloc[test_index])
        r_scores.append(score)
    
    #print(sample_weights)
    end = time.time()
    print("Completed in: {:.0f}min {:.2f}sec ".format((end - start)//60, (end- start)%60))
    print('Accuracy: {:.5f}'.format(np.mean(r_scores)))
    return r_scores, np.mean(r_scores)

### 3/ Modélisation

In [0]:
train_start = '2014-01-02'
train_stop = '2018-12-31'
test_stop = '2019-07-31'

clf1 = SGDClassifier()
clf2 = SVC()
clf3 = RandomForestClassifier(n_estimators = 50)
clf4 = GradientBoostingClassifier(n_estimators = 50)

### 3a) VIX30

In [0]:
results_VIX30 = pd.DataFrame(index = ['SGD','SVM','RF','GBT'], columns = ['feat1','feat2','feat3'])
i = 0
for stocks_dfs in [stocks_feat1,stocks_feat2,stocks_feat3]:
    VIX30 = stocks_dfs[0]

    results_clf1 = model_fit_results(VIX30,clf1, train_start, train_stop, test_stop)
    results_clf2 = model_fit_results(VIX30,clf2, train_start, train_stop, test_stop)
    results_clf3 = model_fit_results(VIX30,clf3, train_start, train_stop, test_stop)
    results_clf4 = model_fit_results(VIX30,clf4, train_start, train_stop, test_stop)
    accuracies = pd.Series([results_clf1[1],results_clf2[1],results_clf3[1],results_clf4[1]], index = ['SGD','SVM','RF','GBT'])
    results_VIX30.iloc[:,i] = accuracies
    i+=1

Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 180 samples
Completed in: 0min 1.61sec 
Accuracy: 0.43646
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 180 samples
Completed in: 0min 11.60sec 
Accuracy: 0.53039
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 180 samples
Completed in: 0min 23.21sec 
Accuracy: 0.55249
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 180 samples
Completed in: 0min 25.19sec 
Accuracy: 0.58564
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 180 samples
Completed in: 0min 1.76sec 
Accuracy: 0.54144
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1527 samples
Test set

In [0]:
results_VIX30

Unnamed: 0,feat1,feat2,feat3
SGD,0.436464,0.541436,0.508287
SVM,0.530387,0.486188,0.541436
RF,0.552486,0.524862,0.519337
GBT,0.585635,0.480663,0.502762


### 3b) VIX

In [0]:
results_VIX = pd.DataFrame(index = ['SGD','SVM','RF','GBT'], columns = ['feat1','feat2','feat3'])
i = 0
for stocks_dfs in [stocks_feat1,stocks_feat2,stocks_feat3]:
    VIX = stocks_dfs[1]

    results_clf1 = model_fit_results(VIX,clf1, train_start, train_stop, test_stop)
    results_clf2 = model_fit_results(VIX,clf2, train_start, train_stop, test_stop)
    results_clf3 = model_fit_results(VIX,clf3, train_start, train_stop, test_stop)
    results_clf4 = model_fit_results(VIX,clf4, train_start, train_stop, test_stop)
    accuracies = pd.Series([results_clf1[1],results_clf2[1],results_clf3[1],results_clf4[1]], index = ['SGD','SVM','RF','GBT'])
    results_VIX.iloc[:,i] = accuracies
    i+=1

Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 1.23sec 
Accuracy: 0.52055
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 6.44sec 
Accuracy: 0.58904
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 16.37sec 
Accuracy: 0.59589
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 16.29sec 
Accuracy: 0.60959
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 1.31sec 
Accuracy: 0.45890
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set 

In [0]:
results_VIX

Unnamed: 0,feat1,feat2,feat3
SGD,0.520548,0.458904,0.541096
SVM,0.589041,0.541096,0.609589
RF,0.59589,0.554795,0.589041
GBT,0.609589,0.561644,0.582192


## 3c) SPX

In [0]:
results_SPX = pd.DataFrame(index = ['SGD','SVM','RF','GBT'], columns = ['feat1','feat2','feat3'])
i = 0
for stocks_dfs in [stocks_feat1,stocks_feat2,stocks_feat3]:
    SPX = stocks_dfs[2]

    results_clf1 = model_fit_results(SPX,clf1, train_start, train_stop, test_stop)
    results_clf2 = model_fit_results(SPX,clf2, train_start, train_stop, test_stop)
    results_clf3 = model_fit_results(SPX,clf3, train_start, train_stop, test_stop)
    results_clf4 = model_fit_results(SPX,clf4, train_start, train_stop, test_stop)
    accuracies = pd.Series([results_clf1[1],results_clf2[1],results_clf3[1],results_clf4[1]], index = ['SGD','SVM','RF','GBT'])
    results_SPX.iloc[:,i] = accuracies
    i+=1

Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 1.08sec 
Accuracy: 0.45890
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 6.58sec 
Accuracy: 0.58219
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 17.87sec 
Accuracy: 0.53425
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 15.80sec 
Accuracy: 0.50685
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set : from 2019-01-02 00:00:00 to 2019-07-31, 145 samples
Completed in: 0min 1.17sec 
Accuracy: 0.56164
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1257 samples
Test set 

In [0]:
results_SPX

Unnamed: 0,feat1,feat2,feat3
SGD,0.458904,0.561644,0.486301
SVM,0.582192,0.547945,0.561644
RF,0.534247,0.513699,0.472603
GBT,0.506849,0.479452,0.493151


## 3d) TVIX

In [0]:
results_TVIX = pd.DataFrame(index = ['SGD','SVM','RF','GBT'], columns = ['feat1','feat2','feat3'])
i = 0
for stocks_dfs in [stocks_feat1,stocks_feat2,stocks_feat3]:
    TVIX = stocks_dfs[3]

    results_clf1 = model_fit_results(TVIX,clf1, train_start, train_stop, test_stop)
    results_clf2 = model_fit_results(TVIX,clf2, train_start, train_stop, test_stop)
    results_clf3 = model_fit_results(TVIX,clf3, train_start, train_stop, test_stop)
    results_clf4 = model_fit_results(TVIX,clf4, train_start, train_stop, test_stop)
    accuracies = pd.Series([results_clf1[1],results_clf2[1],results_clf3[1],results_clf4[1]], index = ['SGD','SVM','RF','GBT'])
    results_TVIX.iloc[:,i] = accuracies
    i+=1

Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 176 samples
Completed in: 0min 1.80sec 
Accuracy: 0.57062
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 176 samples
Completed in: 0min 12.56sec 
Accuracy: 0.59887
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 176 samples
Completed in: 0min 24.63sec 
Accuracy: 0.52542
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 176 samples
Completed in: 0min 26.64sec 
Accuracy: 0.55932
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set : from 2019-01-01 00:00:00 to 2019-07-31, 176 samples
Completed in: 0min 1.91sec 
Accuracy: 0.57627
Fitting model ...
Train set : from 2014-01-02 to 2018-12-31, 1516 samples
Test set

In [0]:
results_TVIX

Unnamed: 0,feat1,feat2,feat3
SGD,0.570621,0.576271,0.553672
SVM,0.59887,0.60452,0.59322
RF,0.525424,0.576271,0.531073
GBT,0.559322,0.542373,0.576271
