**MSFT** - Microsoft Corporation \
**TSLA** - Tesla \
**COKE** - Coca-Cola Consolidated, Inc.  \
**INTC** - Intel Corporation  

In [2]:
# Import libraries
import os
import sys
import requests

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.dates import DateFormatter

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
# from sklearn.metrics import plot_roc_curve
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import mean_squared_error

# COKE

In [5]:
price_data = pd.read_csv("COKE.csv")

In [6]:
price_data.shape

(1243, 25)

In [7]:
price_data.loc[price_data['Target'] == 0.0] = 1.0

In [9]:
# Grab X & Y Columns
X_Cols = price_data[['RSI','r_percent','ROC','MACD']]
Y_Cols = price_data['Target']

In [10]:
X_Cols.describe()

Unnamed: 0,RSI,r_percent,ROC,MACD
count,1243.0,1243.0,1243.0,1243.0
mean,53.29359,-46.796794,0.01168,1.422082
std,19.102157,30.91712,0.084444,6.709661
min,1.0,-100.0,-0.301164,-19.968188
25%,40.689452,-75.674064,-0.027512,-2.637962
50%,52.788734,-45.065541,0.007092,0.907355
75%,66.959667,-17.578418,0.043427,4.898928
max,99.227415,1.0,1.0,35.501855


In [11]:
Y_Cols.value_counts()

Target
 1.0    647
-1.0    596
Name: count, dtype: int64

In [12]:
# Split X and y into train and test
X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, train_size = 0.65)

In [13]:
# Create a Random Forest Classifier
rand_frst_clf = RandomForestClassifier(n_estimators = 100, oob_score = True, criterion = "gini", random_state = 0)

# Fit the data to the model
rand_frst_clf.fit(X_train, y_train)

# Make predictions
y_pred = rand_frst_clf.predict(X_test)

In [14]:
# Print the Accuracy of our Model.
print('Correct Prediction (%): ', accuracy_score(y_test, y_pred, normalize = True) * 100.0)

Correct Prediction (%):  69.72477064220183


In [15]:
mean_squared_error(y_test, y_pred)

1.2110091743119267

## SVM

In [16]:
from sklearn import svm

svm_clf = svm.SVC()

svm_clf.fit(X_train, y_train)

In [17]:
y_pred = svm_clf.predict(X_test)

In [18]:
print('Correct Prediction (%): ', accuracy_score(y_test, y_pred, normalize = True) * 100.0)

Correct Prediction (%):  68.34862385321101


In [19]:
mean_squared_error(y_test, y_pred)

1.2660550458715596

## KNN

In [20]:
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=50)
classifier.fit(X_train, y_train)

In [21]:
y_pred = classifier.predict(X_test)

In [22]:
print('Correct Prediction (%): ', accuracy_score(y_test, y_pred, normalize = True) * 100.0)

Correct Prediction (%):  72.47706422018348


In [23]:
mean_squared_error(y_test, y_pred)

1.1009174311926606

## Logistic Regression

In [24]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()

logreg.fit(X_train,y_train)

y_pred=logreg.predict(X_test)

In [25]:
print('Correct Prediction (%): ', accuracy_score(y_test, y_pred, normalize = True) * 100.0)

Correct Prediction (%):  66.74311926605505


In [26]:
mean_squared_error(y_test, y_pred)

1.3302752293577982

## StratifiedKFold

In [27]:
from sklearn.model_selection import StratifiedKFold, KFold

# Powtórzyć kika razy i zapisać wyniki

skf = StratifiedKFold(n_splits=5) # shuffle default = False 
log = []
knn = []
rfc = []
svm = []

for train_index, test_index in skf.split(X_Cols, Y_Cols):
   # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X_Cols.iloc[train_index], X_Cols.iloc[test_index]
    y_train, y_test = Y_Cols.iloc[train_index], Y_Cols.iloc[test_index]
    # random forest
    rand_frst_clf.fit(X_train, y_train)
    rfc.append(rand_frst_clf.score(X_test, y_test))
    # svm
    svm_clf.fit(X_train, y_train)
    svm.append(svm_clf.score(X_test, y_test))
    # knn
    classifier.fit(X_train, y_train)
    knn.append(classifier.score(X_test, y_test))
    # logistic regression
    logreg.fit(X_train, y_train)
    log.append(logreg.score(X_test, y_test))

In [28]:
print('\nRandom Forest:', rfc)
print('Maximum Accuracy That can be obtained from this model is:',
      max(rfc)*100, '%')
print('Minimum Accuracy:',
      min(rfc)*100, '%')
#
print('\nSVM:', svm)
print('Maximum Accuracy That can be obtained from this model is:',
      max(svm)*100, '%')
print('Minimum Accuracy:',
      min(svm)*100, '%')
#
print('\nKNN:', knn)
print('Maximum Accuracy That can be obtained from this model is:',
      max(knn)*100, '%')
print('Minimum Accuracy:',
      min(knn)*100, '%')
#
print('\nLogistic regression:', log)
print('Maximum Accuracy:',
      max(log)*100, '%')
print('Minimum Accuracy:',
      min(log)*100, '%')


Random Forest: [0.6626506024096386, 0.6666666666666666, 0.6907630522088354, 0.6290322580645161, 0.6532258064516129]
Maximum Accuracy That can be obtained from this model is: 69.07630522088354 %
Minimum Accuracy: 62.903225806451616 %

SVM: [0.6626506024096386, 0.6666666666666666, 0.7630522088353414, 0.6612903225806451, 0.657258064516129]
Maximum Accuracy That can be obtained from this model is: 76.30522088353415 %
Minimum Accuracy: 65.7258064516129 %

KNN: [0.6746987951807228, 0.6947791164658634, 0.7550200803212851, 0.625, 0.6895161290322581]
Maximum Accuracy That can be obtained from this model is: 75.50200803212851 %
Minimum Accuracy: 62.5 %

Logistic regression: [0.6345381526104418, 0.642570281124498, 0.7309236947791165, 0.6653225806451613, 0.6411290322580645]
Maximum Accuracy: 73.09236947791165 %
Minimum Accuracy: 63.45381526104418 %


In [29]:
y_pred_rf=rand_frst_clf.predict(X_test)
y_pred_svm=svm_clf.predict(X_test)
y_pred_knn=classifier.predict(X_test)
y_pred_log=logreg.predict(X_test)

In [30]:
from sklearn import metrics
print("RFC")
print(metrics.accuracy_score(y_test, y_pred_rf))
print("SVM")
print(metrics.accuracy_score(y_test, y_pred_svm))
print("KNN")
print(metrics.accuracy_score(y_test, y_pred_knn))
print("Log")
print(metrics.accuracy_score(y_test, y_pred_log))

RFC
0.6532258064516129
SVM
0.657258064516129
KNN
0.6895161290322581
Log
0.6411290322580645


## Hyperparameter

## KNN

In [None]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier

# Split X and y into train and test
#X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, train_size = 0.65)

# KNN
metrics = ['euclidean','manhattan'] 
#n_neighbors = list(range(100, 150))
n_neighbors = list(range(100, 500, 50))
p = [2,4,6]
weights = ['uniform', 'distance']

random_grid_knn = {'metrics': metrics,
                   'n_neighbors': n_neighbors,
                   'p': p,
                   'weights': weights}

# define evaluation
cv_knn = RepeatedStratifiedKFold(n_splits=3, n_repeats=2)
# define the search
search_knn = BayesSearchCV(estimator= KNeighborsClassifier(), search_spaces=random_grid_knn, n_jobs=-1, cv=cv_knn, verbose = 100)
# perform the search
search_knn.fit(X_train, y_train)
# report the best result
print(search_knn.best_score_)
print(search_knn.best_params_)

Fitting 6 folds for each of 1 candidates, totalling 6 fits


In [10]:
estimator= KNeighborsClassifier()
estimator.get_params().keys()

dict_keys(['algorithm', 'leaf_size', 'metric', 'metric_params', 'n_jobs', 'n_neighbors', 'p', 'weights'])

## SVM

In [8]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from sklearn.svm import SVC
from tqdm import tqdm

# , 'poly'
C = (1e-2, 1e+3, 'log-uniform')

gamma = ['scale', 'auto']

kernel = ['rbf', 'sigmoid']

paramsSVM = {'C': C,
             'gamma': gamma,
             'kernel': kernel,}

# Split X and y into train and test
#X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, train_size = 0.65)

# define evaluation
cv_svm = RepeatedStratifiedKFold(n_splits=5, n_repeats=3)
# define the search
for i in tqdm(paramsSVM):
    search_svm = BayesSearchCV(estimator=SVC(), search_spaces=paramsSVM, n_jobs=-1, cv=cv_svm, verbose = 100)
    #search_svm = RandomizedSearchCV(estimator=SVC(), param_distributions=paramsSVM, n_jobs=-1, cv=cv_svm)
    # perform the search
    search_svm.fit(X_train, y_train)
    # report the best result
    print(search_svm.best_score_)
    print(search_svm.best_params_)


  0%|                                                                                            | 0/3 [00:00<?, ?it/s]

Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for eac



Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits



 33%|███████████████████████████▋                                                       | 1/3 [02:08<04:17, 128.78s/it]

0.5302814201364926
OrderedDict([('C', 165.46537473341982), ('gamma', 'auto'), ('kernel', 'rbf')])
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, total



Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for eac


 67%|███████████████████████████████████████████████████████▎                           | 2/3 [04:11<02:05, 125.27s/it]

0.5302814201364926
OrderedDict([('C', 0.037748639812620256), ('gamma', 'scale'), ('kernel', 'sigmoid')])
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates



Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits




Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for each of 1 candidates, totalling 15 fits
Fitting 15 folds for eac

100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [06:04<00:00, 121.64s/it]

0.5302814201364926
OrderedDict([('C', 0.05136968425913965), ('gamma', 'auto'), ('kernel', 'rbf')])





0.5302814201364926
OrderedDict([('C', 165.46537473341982), ('gamma', 'auto'), ('kernel', 'rbf')])
0.5302814201364926
OrderedDict([('C', 0.037748639812620256), ('gamma', 'scale'), ('kernel', 'sigmoid')])
0.5302814201364926
OrderedDict([('C', 0.05136968425913965), ('gamma', 'auto'), ('kernel', 'rbf')])

II
0.530902538148915
OrderedDict([('C', 0.09288120480023665), ('gamma', 'scale'), ('kernel', 'sigmoid')])
0.532765892186182
OrderedDict([('C', 0.16171819885310468), ('gamma', 'scale'), ('kernel', 'sigmoid')])
0.5302814201364926
OrderedDict([('C', 0.3472933527506412), ('gamma', 'scale'), ('kernel', 'rbf')])

In [8]:
# II
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from sklearn.svm import SVC
from tqdm import tqdm

# , 'poly'
C = (1e-2, 1e+3, 'log-uniform')

gamma = ['scale', 'auto']

kernel = ['rbf', 'sigmoid']

paramsSVM = {'C': C,
             'gamma': gamma,
             'kernel': kernel,}

# Split X and y into train and test
#X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, train_size = 0.65)

# define evaluation
cv_svm = RepeatedStratifiedKFold(n_splits=5, n_repeats=2)
# define the search
for i in tqdm(paramsSVM):
    search_svm = BayesSearchCV(estimator=SVC(), search_spaces=paramsSVM, n_jobs=-1, cv=cv_svm, verbose = 100)
    #search_svm = RandomizedSearchCV(estimator=SVC(), param_distributions=paramsSVM, n_jobs=-1, cv=cv_svm)
    # perform the search
    search_svm.fit(X_train, y_train)
    # report the best result
    print(search_svm.best_score_)
    print(search_svm.best_params_)


  0%|                                                                                            | 0/3 [00:00<?, ?it/s]

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac


 33%|███████████████████████████▋                                                       | 1/3 [09:52<19:44, 592.11s/it]

0.530902538148915
OrderedDict([('C', 0.09288120480023665), ('gamma', 'scale'), ('kernel', 'sigmoid')])
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, 



Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits



 67%|███████████████████████████████████████████████████████▎                           | 2/3 [11:29<05:01, 301.37s/it]

0.532765892186182
OrderedDict([('C', 0.16171819885310468), ('gamma', 'scale'), ('kernel', 'sigmoid')])
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, 



Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits




Fitting 10 folds for each of 1 candidates, totalling 10 fits


100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [13:22<00:00, 267.34s/it]

0.5302814201364926
OrderedDict([('C', 0.3472933527506412), ('gamma', 'scale'), ('kernel', 'rbf')])





## RFC

In [9]:
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from sklearn.svm import SVC
from tqdm import tqdm

# Split X and y into train and test
#X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, train_size = 0.65)

# Random Forest
n_estimators = list(range(100, 1000, 100))

max_features = ['auto', 'sqrt', None, 'log2']

max_depth = list(range(10, 100, 10))
max_depth.append(None)

min_samples_split = [5, 10, 15, 20, 25, 30]

min_samples_leaf = [7, 12, 14, 16 ,20, 24]

bootstrap = [True, False]

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

for i in tqdm(random_grid):
    # define evaluation
    cv_rf = RepeatedStratifiedKFold(n_splits=5, n_repeats=5)
    # define the search
    search_rf = BayesSearchCV(estimator=RandomForestClassifier(), search_spaces=random_grid, n_jobs=-1, cv=cv_rf, verbose = 100)
    # perform the search
    search_rf.fit(X_train, y_train)
    # report the best result
    print(search_rf.best_score_)
    print(search_rf.best_params_)


  0%|                                                                                            | 0/6 [00:00<?, ?it/s]

Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for eac


 17%|█████████████▊                                                                     | 1/6 [08:57<44:49, 537.80s/it]

0.7001487615980371
OrderedDict([('bootstrap', True), ('max_depth', 50), ('max_features', 'log2'), ('min_samples_leaf', 16), ('min_samples_split', 25), ('n_estimators', 600)])
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 c



Fitting 25 folds for each of 1 candidates, totalling 25 fits



 33%|███████████████████████████▋                                                       | 2/6 [18:40<37:37, 564.40s/it]

0.6959297599877311
OrderedDict([('bootstrap', True), ('max_depth', 80), ('max_features', 'log2'), ('min_samples_leaf', 24), ('min_samples_split', 30), ('n_estimators', 600)])
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 c



Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits



 50%|█████████████████████████████████████████▌                                         | 3/6 [27:18<27:08, 542.88s/it]

0.6984126984126984
OrderedDict([('bootstrap', True), ('max_depth', 80), ('max_features', 'auto'), ('min_samples_leaf', 20), ('min_samples_split', 20), ('n_estimators', 900)])
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 c


 67%|███████████████████████████████████████████████████████▎                           | 4/6 [43:55<24:04, 722.46s/it]

0.6988635840809754
OrderedDict([('bootstrap', True), ('max_depth', 40), ('max_features', 'log2'), ('min_samples_leaf', 24), ('min_samples_split', 30), ('n_estimators', 200)])
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 c



Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits



 83%|█████████████████████████████████████████████████████████████████████▏             | 5/6 [51:17<10:21, 621.18s/it]

0.69396518671881
OrderedDict([('bootstrap', True), ('max_depth', 20), ('max_features', 'auto'), ('min_samples_leaf', 20), ('min_samples_split', 25), ('n_estimators', 200)])
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 candidates, totalling 25 fits
Fitting 25 folds for each of 1 can

100%|█████████████████████████████████████████████████████████████████████████████████| 6/6 [1:00:45<00:00, 607.66s/it]

0.695423663829461
OrderedDict([('bootstrap', True), ('max_depth', 70), ('max_features', 'sqrt'), ('min_samples_leaf', 24), ('min_samples_split', 5), ('n_estimators', 600)])





0.7001487615980371
OrderedDict([('bootstrap', True), ('max_depth', 50), ('max_features', 'log2'), ('min_samples_leaf', 16), ('min_samples_split', 25), ('n_estimators', 600)])
0.6959297599877311
OrderedDict([('bootstrap', True), ('max_depth', 80), ('max_features', 'log2'), ('min_samples_leaf', 24), ('min_samples_split', 30), ('n_estimators', 600)])
0.6984126984126984
OrderedDict([('bootstrap', True), ('max_depth', 80), ('max_features', 'auto'), ('min_samples_leaf', 20), ('min_samples_split', 20), ('n_estimators', 900)])
0.6988635840809754
OrderedDict([('bootstrap', True), ('max_depth', 40), ('max_features', 'log2'), ('min_samples_leaf', 24), ('min_samples_split', 30), ('n_estimators', 200)])
0.69396518671881
OrderedDict([('bootstrap', True), ('max_depth', 20), ('max_features', 'auto'), ('min_samples_leaf', 20), ('min_samples_split', 25), ('n_estimators', 200)])
0.695423663829461
OrderedDict([('bootstrap', True), ('max_depth', 70), ('max_features', 'sqrt'), ('min_samples_leaf', 24), ('min_samples_split', 5), ('n_estimators', 600)])

In [20]:
rf_random.best_params_

{'n_estimators': 350,
 'min_samples_split': 30,
 'min_samples_leaf': 24,
 'max_features': 'sqrt',
 'max_depth': 10,
 'bootstrap': True}

In [22]:
y_pred = rf_random.predict(X_test)
# ACCURACY

# Once the predictions have been made, then grab the accuracy score.
print('Correct Prediction (%): ', accuracy_score(y_test, rf_random.predict(X_test), normalize = True) * 100.0)

# CLASSIFICATION REPORT

# Traget names
target_names = ['Down Day', 'Up Day']

# Build a classifcation report
report = classification_report(y_true = y_test, y_pred = y_pred, target_names = target_names, output_dict = True)

# Add it to a dataframe, transpose it for readability
report_df = pd.DataFrame(report).transpose()
display(report_df)
print('\n')

# FEATURE IMPORTANCE

feature_imp = pd.Series(rand_frst_clf.feature_importances_, index=X_Cols.columns).sort_values(ascending=False)
display(feature_imp)

Correct Prediction (%):  68.57798165137615


Unnamed: 0,precision,recall,f1-score,support
Down Day,0.665049,0.668293,0.666667,205.0
Up Day,0.704348,0.701299,0.70282,231.0
accuracy,0.68578,0.68578,0.68578,0.68578
macro avg,0.684698,0.684796,0.684743,436.0
weighted avg,0.68587,0.68578,0.685821,436.0






NameError: name 'rand_frst_clf' is not defined

## Inne

In [None]:
# Define the traget names
target_names = ['Up', 'Down']

# Build a classifcation report
report = classification_report(y_true = y_test, y_pred = y_pred,target_names = target_names, output_dict = True)

# Add it to a data frame, transpose it for readability.
report_df = pd.DataFrame(report).transpose()
report_df

In [None]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix

# Create confusion matrix
rf_matrix = confusion_matrix(y_test, y_pred)

true_negatives = rf_matrix[0][0]
false_negatives = rf_matrix[1][0]
true_positives = rf_matrix[1][1]
false_positives = rf_matrix[0][1]

# Calculate the same metrics up above
accuracy = (true_negatives + true_positives) / (true_negatives + true_positives + false_negatives + false_positives)
percision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
specificity = true_negatives / (true_negatives + false_positives)

print('Accuracy: {}'.format(float(accuracy)))
print('Percision: {}'.format(float(percision)))
print('Recall: {}'.format(float(recall)))
print('Specificity: {}'.format(float(specificity)))

# Create graph to plot confusion matrix (%)
disp = plot_confusion_matrix(rand_frst_clf, X_test, y_test, display_labels = ['Down Day', 'Up Day'], normalize = 'true', cmap=plt.cm.Blues)
disp.ax_.set_title('Confusion Matrix - Normalized')
plt.show()

In [None]:
feature_imp = pd.Series(rand_frst_clf.feature_importances_, index=X_Cols.columns).sort_values(ascending = False)
feature_imp

In [None]:
# Create an ROC Curve plot
rfc_disp = plot_roc_curve(rand_frst_clf, X_test, y_test, alpha = 0.9)
plt.show()

In [None]:
# Print out-of-bag error score
print('Random Forest Out-Of-Bag Error Score: {}'.format(rand_frst_clf.oob_score_))

In [None]:
# ROC CURVE

fig, ax = plt.subplots()

# Create an ROC Curve plot
rfc_disp = plot_roc_curve(rand_frst_clf, X_test, y_test, alpha = 0.8, name='ROC Curve', lw=1, ax=ax)

# Add our Chance Line
ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)

ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="ROC Curve Random Forest")

# Add the legend to the plot
ax.legend(loc="lower right")

plt.show()