# SVM

In [1]:
import time
import datetime
import bz2

from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
df = pd.read_csv('data_ML_19_05.csv', index_col=['date'], parse_dates=['date'], dayfirst=True)
df.head()

Unnamed: 0_level_0,close,close_change,open,high,low,volume,bb_bbh,bb_bbl,bb_bbm,ATR_10,...,ROCI_40,ROCI_60,ROCI_120,Vortex_diff,Vortex_neg,Vortex_pos,ichimoku_a,ichimoku_b,ichimoku_bl,ichimoku_cl
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-19,1.15947,1,1.190209,1.192707,1.183859,125293.7,1.224872,1.182937,1.203905,0.005052,...,0.0,0.0,0.0,-0.292063,0.566607,0.274544,1.199552,1.202186,1.202186,1.196918
2015-06-08,1.128,1,1.109748,1.115922,1.103201,178572.05,1.148248,1.085201,1.116725,0.011961,...,3.059706,3.102878,-6.589708,0.134893,0.93098,1.065874,1.113431,1.09936,1.111859,1.115003
2015-06-09,1.12926,1,1.108914,1.115403,1.102083,189387.35,1.147392,1.085082,1.116237,0.012098,...,2.95245,3.084663,-6.40161,0.114822,0.941023,1.055845,1.113217,1.09936,1.112258,1.114176
2015-06-10,1.13099,1,1.108156,1.115101,1.10189,191876.6,1.146536,1.08473,1.115633,0.01221,...,2.897022,3.109756,-6.191681,0.093968,0.952233,1.046201,1.113011,1.09936,1.112656,1.113366
2015-06-11,1.12458,0,1.108085,1.115129,1.102208,190246.55,1.145869,1.084101,1.114985,0.012281,...,2.946539,3.079194,-5.817274,0.076014,0.961502,1.037516,1.112856,1.09936,1.113054,1.112659


In [3]:
train_part = int(len(df.close)*0.8)
test_part = len(df.close) - train_part
test_part

373

In [4]:
X_train = df.copy(deep=True)
X_train.drop(['close_change', 'close'],axis=1, inplace= True)
X_train.drop(X_train.index[-test_part:],inplace= True)
X_train = StandardScaler().fit_transform(X_train)

X_test = df.copy(deep=True)
X_test.drop(['close_change', 'close'],axis=1, inplace= True)
X_test.drop(X_test.index[:-test_part],inplace= True)
X_test = StandardScaler().fit_transform(X_test)

In [5]:
y_train =  df['close_change'][:-test_part]
y_test = df['close_change'][-test_part:].to_numpy()

In [8]:
C_list =  [100, 50, 20, 10, 5, 2, 1, 0.5, 0.3, 0.2, 0.1, 0.01 , 0.005 ,0.001]
kernel_list= ['linear', 'poly', 'rbf', 'sigmoid' ]
scores = []
best_mode = None
best_score = 0
for C in C_list:
    for kernel in kernel_list:
        start_time = datetime.datetime.now()

        print("\nC is:", C, "kernel", kernel)
        model = SVC(C=C, kernel=kernel, random_state=241)
        #verbose=True

        model.fit(X_train, y_train)
        model.predict(X_test)

        scores.append(cross_val_score(estimator=model, X=X_train,y=y_train,scoring="roc_auc"))

        print("score", model.score(X_train, y_train))

        print("Time've passed", datetime.datetime.now() - start_time)
        print("Kross validation score=", scores[-1])
        print("Mean=", np.mean(scores[-1]))

        if np.mean(scores[-1]) > best_score:
            best_model = model
            best_score = np.mean(scores[-1])


C is: 100 kernel linear
score 0.6102150537634409
Time've passed 0:00:20.900029
Kross validation score= [0.59305312 0.53369369 0.44301802 0.55419501 0.68444444]
Mean= 0.5616808565559748

C is: 100 kernel poly
score 0.6216397849462365
Time've passed 0:00:02.330004
Kross validation score= [0.54119926 0.40472973 0.34027027 0.37931973 0.50022676]
Mean= 0.4331491492844378

C is: 100 kernel rbf
score 0.6411290322580645
Time've passed 0:00:01.260001
Kross validation score= [0.46371131 0.40761261 0.38869369 0.41814059 0.50176871]
Mean= 0.4359853831395932

C is: 100 kernel sigmoid
score 0.4959677419354839
Time've passed 0:00:00.560001
Kross validation score= [0.56115691 0.49441441 0.53373874 0.36870748 0.59251701]
Mean= 0.5101069112090872

C is: 50 kernel linear
score 0.5934139784946236
Time've passed 0:00:10.101014
Kross validation score= [0.58142992 0.5304955  0.42617117 0.52548753 0.65945578]
Mean= 0.5446079800573651

C is: 50 kernel poly
score 0.6115591397849462
Time've passed 0:00:01.58000

score 0.5235215053763441
Time've passed 0:00:00.560001
Kross validation score= [0.51772762 0.52864865 0.39702703 0.34929705 0.52276644]
Mean= 0.46309335771776644

C is: 0.01 kernel rbf
score 0.5047043010752689
Time've passed 0:00:00.870001
Kross validation score= [0.44632157 0.35418919 0.37148649 0.34566893 0.37673469]
Mean= 0.3788801757562401

C is: 0.01 kernel sigmoid
score 0.5047043010752689
Time've passed 0:00:00.810002
Kross validation score= [0.44591611 0.47211712 0.36711712 0.36975057 0.55659864]
Mean= 0.4422999110747455

C is: 0.005 kernel linear
score 0.5248655913978495
Time've passed 0:00:00.470000
Kross validation score= [0.48650719 0.45045045 0.36918919 0.37451247 0.55768707]
Mean= 0.4476692743561239

C is: 0.005 kernel poly
score 0.5134408602150538
Time've passed 0:00:00.560001
Kross validation score= [0.51475425 0.52887387 0.39126126 0.34675737 0.52226757]
Mean= 0.46078286490301623

C is: 0.005 kernel rbf
score 0.5047043010752689
Time've passed 0:00:00.870001
Kross valida