# Ensembles Classification

In [1]:
import time
import datetime
import bz2

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Gradient boosting 

In [2]:
df = pd.read_csv('data_ML_19_05.csv', index_col=['date'], parse_dates=['date'], dayfirst=True)
df.head()

Unnamed: 0_level_0,close,close_change,open,high,low,volume,bb_bbh,bb_bbl,bb_bbm,ATR_10,...,ROCI_40,ROCI_60,ROCI_120,Vortex_diff,Vortex_neg,Vortex_pos,ichimoku_a,ichimoku_b,ichimoku_bl,ichimoku_cl
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-19,1.15947,1,1.190209,1.192707,1.183859,125293.7,1.224872,1.182937,1.203905,0.005052,...,0.0,0.0,0.0,-0.292063,0.566607,0.274544,1.199552,1.202186,1.202186,1.196918
2015-06-08,1.128,1,1.109748,1.115922,1.103201,178572.05,1.148248,1.085201,1.116725,0.011961,...,3.059706,3.102878,-6.589708,0.134893,0.93098,1.065874,1.113431,1.09936,1.111859,1.115003
2015-06-09,1.12926,1,1.108914,1.115403,1.102083,189387.35,1.147392,1.085082,1.116237,0.012098,...,2.95245,3.084663,-6.40161,0.114822,0.941023,1.055845,1.113217,1.09936,1.112258,1.114176
2015-06-10,1.13099,1,1.108156,1.115101,1.10189,191876.6,1.146536,1.08473,1.115633,0.01221,...,2.897022,3.109756,-6.191681,0.093968,0.952233,1.046201,1.113011,1.09936,1.112656,1.113366
2015-06-11,1.12458,0,1.108085,1.115129,1.102208,190246.55,1.145869,1.084101,1.114985,0.012281,...,2.946539,3.079194,-5.817274,0.076014,0.961502,1.037516,1.112856,1.09936,1.113054,1.112659


In [3]:
train_part = int(len(df.close)*0.8)
test_part = len(df.close) - train_part
test_part

373

In [4]:
X_train = df.copy(deep=True)
X_train.drop(['close_change', 'close'],axis=1, inplace= True)
X_train.drop(X_train.index[-test_part:],inplace= True)
X_train = StandardScaler().fit_transform(X_train)

In [5]:
X_test = df.copy(deep=True)
X_test.drop(['close_change', 'close'],axis=1, inplace= True)
X_test.drop(X_test.index[:-test_part],inplace= True)
X_test = StandardScaler().fit_transform(X_test)

In [6]:
y_train =  df['close_change'][:-test_part]
y_test = df['close_change'][-test_part:].to_numpy()

In [7]:
#date_list = df.index[-test_part:].to_list()

In [8]:
learning_rates =  [5, 2, 1, 0.5, 0.3, 0.2, 0.1, 0.05, 0.01, 0.0075 , 0.005, 0.003 ,0.001]
scores= []
best_mode = None
best_score = 0
for learning_rate in learning_rates:
    start_time = datetime.datetime.now()
    
    print("\nLearing rate is:", learning_rate)
    model = GradientBoostingClassifier(learning_rate=learning_rate, n_estimators=250, random_state=241)
    #verbose=True
    
    model.fit(X_train, y_train)
    model.predict(X_test)
    
    scores.append(cross_val_score(estimator=model, X=X_train,y=y_train,scoring="roc_auc"))
    
    print("score", model.score(X_train, y_train))
    
    print("Time've passed", datetime.datetime.now() - start_time)
    print("Kross validation score=", scores[-1])
    print("Mean=", np.mean(scores[-1]))
    
    if np.mean(scores[-1]) > best_score:
        best_model = model
        best_score = np.mean(scores[-1])


Learing rate is: 5
score 0.49126344086021506
Time've passed 0:00:14.921021
Kross validation score= [0.49337748 0.44502252 0.46592342 0.4747619  0.44326531]
Mean= 0.4644701280548017

Learing rate is: 2
score 0.5221774193548387
Time've passed 0:00:15.431022
Kross validation score= [0.50013515 0.51031532 0.62826577 0.46156463 0.52743764]
Mean= 0.525543700410777

Learing rate is: 1
score 1.0
Time've passed 0:00:15.101021
Kross validation score= [0.49445871 0.42817568 0.39256757 0.42099773 0.42331066]
Mean= 0.4319020687804983

Learing rate is: 0.5
score 1.0
Time've passed 0:00:14.760021
Kross validation score= [0.45580484 0.4140991  0.35702703 0.38256236 0.4652381 ]
Mean= 0.41494628362651065

Learing rate is: 0.3
score 0.9899193548387096
Time've passed 0:00:14.880020
Kross validation score= [0.45830518 0.42445946 0.36396396 0.34641723 0.48031746]
Mean= 0.4146926587352321

Learing rate is: 0.2
score 0.967741935483871
Time've passed 0:00:14.990021
Kross validation score= [0.47573996 0.410900

## Random Forest

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold, cross_val_score

In [10]:
cv = KFold(shuffle=False)

In [11]:
Estimators_list = [5, 10, 25, 50, 75, 100, 300, 500]

In [12]:
scores_RF = []
best_mode_RF = None
best_score_RF = 0

for i in Estimators_list:
    print("\nEstimators:", i)
    model_RF = RandomForestClassifier(random_state=1, n_estimators=i)

    model_RF.fit(X_train, y_train)
    model_RF.predict(X_test)
    scores_RF.append(cross_val_score(estimator=model_RF, X=X_train,y=y_train,scoring="roc_auc"))
    
    print("score", model_RF.score(X_train, y_train))
    #print("score", model.score(X_train, y_train))
    
    print("Time've passed", datetime.datetime.now() - start_time)
    print("Kross validation score=", scores_RF[-1])
    print("Mean=", np.mean(scores_RF[-1]))

    
    if np.mean(scores_RF[-1]) > best_score:
        best_model_RF = model_RF
        best_score_RF = np.mean(scores_RF[-1])
    


Estimators: 5
score 0.9327956989247311
Time've passed 0:00:15.190022
Kross validation score= [0.49407578 0.34394144 0.35786036 0.35814059 0.44904762]
Mean= 0.4006131572848696

Estimators: 10
score 0.9657258064516129
Time've passed 0:00:15.640022
Kross validation score= [0.44510519 0.34864865 0.36995495 0.33678005 0.4061678 ]
Mean= 0.3813313287608463

Estimators: 25
score 0.9899193548387096
Time've passed 0:00:16.710024
Kross validation score= [0.46923008 0.3470045  0.35407658 0.33068027 0.45303855]
Mean= 0.39080599561583484

Estimators: 50
score 1.0
Time've passed 0:00:18.850027
Kross validation score= [0.4451953  0.3345045  0.35563063 0.34979592 0.41015873]
Mean= 0.37905701606458464

Estimators: 75
score 1.0
Time've passed 0:00:22.040031
Kross validation score= [0.43222057 0.32968468 0.35121622 0.34988662 0.3852381 ]
Mean= 0.36964923756030665

Estimators: 100
score 1.0
Time've passed 0:00:26.290037
Kross validation score= [0.42920214 0.3309009  0.3534009  0.34655329 0.38884354]
Mean=