<h1 align="center">S2R Analytics</h1>
<h2 align="center">Profitability of Client X projects: run 1</h2>

# Table of Contents

* [Part 6](#part6): Classification
    * [6.0](#6_0): Data splitting
    * [6.1](#6_1): Models
<br />
<br />
* [Part 7](#part7): Fine-tuning
* [Part 8](#part8): Ensemble learning
* [Part 9](#part9): Evaluation of the final model

## Notebook Setup

In [None]:
# Essentials
import pandas as pd
from pandas import Series, DataFrame
from pandas.api.types import CategoricalDtype
pd.options.display.max_columns = None
import sqlite3
import pyodbc
import numpy as np; np.random.seed(1)

# Image creation and display
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.patches as mpatches
from matplotlib import pyplot
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.ticker import FuncFormatter
from yellowbrick.model_selection import FeatureImportances

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_transformer

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import SVC

# Metrics of accuracy
from numpy import mean
from numpy import std
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from pycm import *
import imbalanced_ensemble as imbens
from imbalanced_ensemble.ensemble.base import sort_dict_by_key
from collections import Counter

# Fine-tuning and enseble learning
from pprint import pprint
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.base import clone
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import RandomizedSearchCV

# Other
import itertools as it
import io
import os
os.sys.path
import sys
import glob
import concurrent.futures
from __future__ import print_function
import binascii
import struct
from PIL import Image
import scipy
import scipy.misc
import scipy.cluster
import datetime, time
import functools, operator
from datetime import datetime
from numpy.random import seed
from numpy.random import randn
from numpy import percentile

In [None]:
df = pd.read_csv('../../ETL/csv-files/active_redef.csv')

In [None]:
df

## Part 6: <a class="anchor" id="part6"></a> Classification

### 6.0 <a class="anchor" id="6_0"></a> Data splitting

In [None]:
# Choose dependent variables
Y = df[['Redefined Class']]

# Drop the dependent variables from the feature data set
X = df.drop(columns = ['Redefined Class'])

# Scale the explanatory variables
X1 = pd.DataFrame(StandardScaler().fit_transform(X))
X1.columns = X.columns
X = X1

# Split data set into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=1, stratify = Y)

print(f'No. of training data: {X_train.shape[0]}')
print(f'No. of training targets: {Y_train.shape[0]}')
print(f'No. of testing data: {X_test.shape[0]}')
print(f'No. of testing targets: {Y_test.shape[0]}')

### 6.1 <a class="anchor" id="6_1"></a> Models

#### 6.1.1  <a class="anchor" id="6_1_1"></a> Logistic regression

In [None]:
log = LogisticRegression(random_state = 1, max_iter = 30000)
log.fit(X_train, Y_train.values.ravel())
log_y_pred=log.predict(X_test)

print('Precision score of LOG: ' + str(round(metrics.precision_score(Y_test, np.round(log_y_pred), average='weighted', zero_division=0), 3)*100)+'%')
print('F1 of LOG: ' + str(round(metrics.f1_score(Y_test, np.round(log_y_pred), average='weighted'), 3)*100)+'%')
print('Recall score of LOG: ' + str(round(metrics.recall_score(Y_test, np.round(log_y_pred), average='weighted', zero_division=0), 3)*100)+'%')
print('Accuracy score of LOG: ' + str(round(metrics.accuracy_score(Y_test, np.round(log_y_pred)), 3)*100)+'%')

#### 6.1.2 <a class="anchor" id="6_1_2"></a> K-Neighbours classifier

In [None]:
np.random.seed(1)
knn_7 = KNeighborsClassifier(n_neighbors=7)
knn_7.fit(X_train, Y_train.values.ravel())
knn_7_y_pred = knn_7.predict(X_test)

print('Precision score of KNN-7: ' + str(round(metrics.precision_score(Y_test, np.round(knn_7_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of KNN-7: ' + str(round(metrics.f1_score(Y_test, np.round(knn_7_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of KNN-7: ' + str(round(metrics.accuracy_score(Y_test, np.round(knn_7_y_pred)), 3)*100)+'%')

#### 6.1.3  <a class="anchor" id="6_1_3"></a> Decision tree classifier

In [None]:
dtc = DecisionTreeClassifier(random_state = 1)
dtc = dtc.fit(X_train, Y_train.values.ravel())
dtc_y_pred = dtc.predict(X_test)

print('Precision score of DTC: ' + str(round(metrics.precision_score(Y_test, np.round(dtc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of DTC: ' + str(round(metrics.f1_score(Y_test, np.round(dtc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of DTC: ' + str(round(metrics.accuracy_score(Y_test, np.round(dtc_y_pred)), 3)*100)+'%')

#### 6.1.4  <a class="anchor" id="6_1_4"></a> Random forest classifier

In [None]:
rfc = RandomForestClassifier(random_state = 1)
rfc.fit(X_train, Y_train.values.ravel())
rfc_y_pred=rfc.predict(X_test)

print('Precision score of RFC: ' + str(round(metrics.precision_score(Y_test, np.round(rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of RFC: ' + str(round(metrics.f1_score(Y_test, np.round(rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(rfc_y_pred)), 3)*100)+'%')

#### 6.1.5  <a class="anchor" id="6_1_5"></a> XGBoost classifier

In [None]:
xgbc = XGBClassifier(n_estimators=100, learning_rate=0.05, booster='gbtree', random_state = 1, eval_metric='mlogloss', use_label_encoder=False)
xgbc.fit(X_train, Y_train.values.ravel())
xgbc_y_pred=xgbc.predict(X_test)

print('Precision score of XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(xgbc_y_pred)), 3)*100)+'%')

#### 6.1.6  <a class="anchor" id="6_1_6"></a> Naive Bayes

In [None]:
gnb = GaussianNB()
gnb.fit(X_train, Y_train.values.ravel())
gnb_y_pred = gnb.predict(X_test)

print('Precision score of GNB: ' + str(round(metrics.precision_score(Y_test, np.round(gnb_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of GNB: ' + str(round(metrics.f1_score(Y_test, np.round(gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of GNB: ' + str(round(metrics.accuracy_score(Y_test, np.round(gnb_y_pred)), 3)*100)+'%')

#### 6.1.7  <a class="anchor" id="6_1_7"></a> Linear discriminant analysis

In [None]:
lda = LinearDiscriminantAnalysis(n_components = 2)
lda.fit(X_train, Y_train.values.ravel())
lda_y_pred = lda.predict(X_test)

print('Precision score of LDA: ' + str(round(metrics.precision_score(Y_test, np.round(lda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of LDA: ' + str(round(metrics.f1_score(Y_test, np.round(lda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of LDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(lda_y_pred)), 3)*100)+'%')

#### 6.1.8  <a class="anchor" id="6_1_8"></a> Quadratic discriminant analysis

In [None]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, Y_train.values.ravel())
qda_y_pred = qda.predict(X_test)

print('Precision score of QDA: ' + str(round(metrics.precision_score(Y_test, np.round(qda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of QDA: ' + str(round(metrics.f1_score(Y_test, np.round(qda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of QDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(qda_y_pred)), 3)*100)+'%')

#### 6.1.9  <a class="anchor" id="6_1_9"></a> Ridge regression classifier

In [None]:
rdg = RidgeClassifier(alpha=1.0, random_state = 1, max_iter = 30000)
rdg.fit(X_train, Y_train.values.ravel())
rdg_y_pred=rdg.predict(X_test)

print('Precision score of RDG: ' + str(round(metrics.precision_score(Y_test, np.round(rdg_y_pred), average='weighted', zero_division=0), 3)*100)+'%')
print('F1 of RDG: ' + str(round(metrics.f1_score(Y_test, np.round(rdg_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of RDG: ' + str(round(metrics.accuracy_score(Y_test, np.round(rdg_y_pred)), 3)*100)+'%')

#### 6.1.10  <a class="anchor" id="6_1_10"></a> Support vector machines

In [None]:
svm = SVC(kernel='linear', random_state = 1, probability=True)
svm.fit(X_train, Y_train.values.ravel())
svm_y_pred = svm.predict(X_test)

print('Precision score of SVM: ' + str(round(metrics.precision_score(Y_test, np.round(svm_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of SVM: ' + str(round(metrics.f1_score(Y_test, np.round(svm_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(svm_y_pred)), 3)*100)+'%')

## Part 7: <a class="anchor" id="part7"></a> Fine-tuning

### 7.1  <a class="anchor" id="7_1"></a> XGBoost grid search

In [None]:
# Look at parameters used by our current XGBoost model
print('Parameters currently in use:\n')
pprint(xgbc.get_params())

In [None]:
# Defining parameter range
xgbc_grid = {'learning_rate':[0.1, 1],
    'n_estimators':[1000, 1500],
    'max_depth':[4,5,6],
    'min_child_weight':[6,8,10,12],
    'gamma':[i/10.0 for i in range(0,5)],
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)],
    'objective':['binary:logistic'],
    'nthread':[4],
    'seed':[1],
    'eval_metric':['mlogloss']}

pprint(xgbc_grid)

In [None]:
# Fitting the model for grid search
xgbc_tuned = GridSearchCV(XGBClassifier(), xgbc_grid, refit = True)
xgbc_tuned.fit(X_train, Y_train.values.ravel())

In [None]:
# Print best parameter after tuning
print(xgbc_tuned.best_params_)
 
# Print how our model looks after hyper-parameter tuning
print(xgbc_tuned.best_estimator_)

In [None]:
# Create a XGBoost_tuned model
xgbc_tuned = XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.8,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric='mlogloss', gamma=0.4, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.1, max_bin=256,
              max_cat_to_onehot=4, max_delta_step=0, max_depth=6, max_leaves=0,
              min_child_weight=10, monotone_constraints='()',
              n_estimators=1000, n_jobs=4, nthread=4, num_parallel_tree=1,
              objective='multi:softprob', predictor='auto', random_state=1)

In [None]:
# Base model results
xgbc_base_y_pred = xgbc.predict(X_test)
xgbc_base_precision = round(metrics.precision_score(Y_test, np.round(xgbc_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base XGBC is ' + str(xgbc_base_precision)+'%')

# Tuned model results
xgbc_tuned.fit(X_train, Y_train.values.ravel())
xgbc_tuned_y_pred = xgbc_tuned.predict(X_test)
xgbc_tuned_precision = round(metrics.precision_score(Y_test, np.round(xgbc_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned XGBC is ' + str(xgbc_tuned_precision)+'%')

# Comparison
print('Improvement of {:0.2f}%'.format(100 * (xgbc_tuned_precision - xgbc_base_precision) / xgbc_base_precision))

In [None]:
# Rest of the measures
print('F1 of tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(xgbc_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Recall of tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(xgbc_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy of tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(xgbc_tuned_y_pred)), 3)*100)+'%')

### 7.2  <a class="anchor" id="7_2"></a> Random forest classifier grid search

In [None]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 2000, num = 10)]

# Number of features to consider at every split
max_features = ['sqrt']

# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)

# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]

# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
                     'max_features': max_features,
                     'max_depth': max_depth,
                     'min_samples_split': min_samples_split,
                      'min_samples_leaf': min_samples_leaf,
                      'bootstrap': bootstrap}
pprint(random_grid)

In [105]:
rfc_tuned = RandomizedSearchCV(estimator = rfc,
                               param_distributions = random_grid,
                               n_iter = 100,
                               cv = 5,
                               verbose = 2,
                               random_state = 1,
                               n_jobs = -1)
                               
# Fit the random search model
rfc_tuned.fit(X_train, Y_train.values.ravel())

In [106]:
# Print best parameter after tuning
print(rfc_tuned.best_params_)
 
# Print how our model looks after hyper-parameter tuning
print(rfc_tuned.best_estimator_)

{'n_estimators': 1155, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 10, 'bootstrap': True}
RandomForestClassifier(max_depth=10, min_samples_leaf=4, n_estimators=1155,
                       random_state=1)


In [111]:
# Create an RFC_tuned model
rfc_tuned = RandomForestClassifier(max_depth=10, min_samples_leaf=4, n_estimators=1155, random_state=1)

In [114]:
# Base model results
rfc_base_y_pred = rfc.predict(X_test)
rfc_base_precision = round(metrics.precision_score(Y_test, np.round(rfc_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base RFC is ' + str(rfc_base_precision)+'%')

# Tuned model results
rfc_tuned.fit(X_train, Y_train.values.ravel())
rfc_tuned_y_pred = rfc_tuned.predict(X_test)
rfc_tuned_precision = round(metrics.precision_score(Y_test, np.round(rfc_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned RFC is ' + str(rfc_tuned_precision)+'%')

# Comparison
print('Improvement of {:0.1f}%'.format(100 * (rfc_tuned_precision - rfc_base_precision) / rfc_base_precision))

Precision of base RFC is 57.8%
Precision of tuned RFC is 59.8%
Improvement of 3.5%


In [115]:
print('F1 of tuned RFC: ' + str(round(metrics.f1_score(Y_test, np.round(rfc_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of tuned RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(rfc_tuned_y_pred)), 3)*100)+'%')

F1 of tuned RFC: 51.0%
Accuracy score of tuned RFC: 57.9%


### 7.3  <a class="anchor" id="7_3"></a> SVM RBF grid search

In [107]:
# Look at parameters used by our current SVM model
print('Parameters currently in use:\n')
pprint(svm.get_params())

Parameters currently in use:

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': True,
 'random_state': 1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [108]:
# Defining parameter range
svm_grid = {'C': [0.1, 1, 2, 3, 4, 5, 10],
            'gamma': [1, 2, 3, 4, 5, 0.1, 0.01],
            'kernel': ['linear']}
 
# Fitting the model for grid search
svm_tuned = GridSearchCV(SVC(), svm_grid, refit = True) 
svm_tuned.fit(X_train, Y_train.values.ravel())

In [109]:
# Print best parameter after tuning
print(svm_tuned.best_params_)
 
# Print how our model looks after hyper-parameter tuning
print(svm_tuned.best_estimator_)

{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
SVC(C=0.1, gamma=1, kernel='linear')


In [116]:
# Create a tuned SVC model with linear kernel
svm_tuned = SVC(kernel='linear', C = 0.1, gamma = 1, random_state = 1, probability=True)
svm_tuned.fit(X_train, Y_train.values.ravel())

In [117]:
# Base model results
svm_base_y_pred = svm.predict(X_test)
svm_base_precision = round(metrics.precision_score(Y_test, np.round(svm_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base SVM is ' + str(svm_base_precision)+'%')

# Tuned model results with kernel
svm_tuned_y_pred = svm_tuned.predict(X_test)
svm_tuned_precision = round(metrics.precision_score(Y_test, np.round(svm_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned SVM with RBF kernel is ' + str(svm_tuned_precision)+'%')

print('Improvement of {:0.2f}%'.format(100 * (svm_tuned_precision - svm_base_precision) / svm_base_precision))

Precision of base SVM is 41.699999999999996%
Precision of tuned SVM with RBF kernel is 41.699999999999996%
Improvement of 0.00%


In [119]:
# Create a tuned SVC model with RBF kernel
svm_tuned = SVC(kernel='rbf', C = 0.1, gamma = 1, random_state = 1, probability=True)
svm_tuned.fit(X_train, Y_train.values.ravel())

In [120]:
# Base model results
svm_base_y_pred = svm.predict(X_test)
svm_base_precision = round(metrics.precision_score(Y_test, np.round(svm_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base SVM is ' + str(svm_base_precision)+'%')

# Tuned model results with kernel
svm_tuned_y_pred = svm_tuned.predict(X_test)
svm_tuned_precision = round(metrics.precision_score(Y_test, np.round(svm_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned SVM with RBF kernel is ' + str(svm_tuned_precision)+'%')

print('Improvement of {:0.2f}%'.format(100 * (svm_tuned_precision - svm_base_precision) / svm_base_precision))

Precision of base SVM is 41.699999999999996%
Precision of tuned SVM with RBF kernel is 75.0%
Improvement of 79.86%


In [121]:
# Rest of the measures
print('F1 of tuned SVM with RBF kernel: ' + str(round(metrics.f1_score(Y_test, np.round(svm_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy of tuned SVM with RBF kernel: ' + str(round(metrics.accuracy_score(Y_test, np.round(svm_tuned_y_pred)), 3)*100)+'%')

F1 of tuned SVM with RBF kernel: 33.300000000000004%
Accuracy of tuned SVM with RBF kernel: 50.0%


## Part 8: <a class="anchor" id="part8"></a> Ensemble learning

### 8.1  <a class="anchor" id="8_1"></a> Voting classifier

In [None]:
soft_voting = VotingClassifier(estimators=[('xgbc_t', xgbc_tuned), ('rfc_t', rfc_tuned)], voting='soft')
soft_voting.fit(X_train, Y_train.values.ravel())
sv_y_pred = soft_voting.predict(X_test)

In [None]:
print('Precision score of soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall score of soft voting classifier ' + str(round(metrics.recall_score(Y_test, np.round(sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(sv_y_pred)), 3)*100)+'%')

In [None]:
hard_voting = VotingClassifier(estimators=[('xgbc_t', xgbc_tuned), ('rfc_t', rfc_tuned), ('svm_tuned', svm_tuned)], voting='hard')
hard_voting.fit(X_train, Y_train.values.ravel())
hv_y_pred = hard_voting.predict(X_test)

In [None]:
print('Precision score of hard voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of hard voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall score of hard voting classifier ' + str(round(metrics.recall_score(Y_test, np.round(hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of hard voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(hv_y_pred)), 3)*100)+'%')

### 8.2  <a class="anchor" id="8_2"></a> Stacking

#### 8.2.1  <a class="anchor" id="8_2_1"></a> All models

In [None]:
def get_stacking():
    level11 = list()
    level11.append(('hard voting', hard_voting))
    level11.append(('rfc', rfc))
    level11.append(('xgbc tuned', xgbc_tuned))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('gnb', gnb))
    level11.append(('dtc', dtc))
    level11.append(('knn', knn_7))
    level11.append(('qda', qda))
    level11.append(('rdg', rdg))
    level11.append(('log', log))
    level11.append(('lda', lda))
    model = StackingClassifier(estimators=level11, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level11 = list()
level11.append(('hard voting', hard_voting))
level11.append(('rfc', rfc))
level11.append(('xgbc tuned', xgbc_tuned))
level11.append(('svm tuned', svm_tuned))
level11.append(('gnb', gnb))
level11.append(('dtc', dtc))
level11.append(('knn', knn_7))
level11.append(('qda', qda))
level11.append(('rdg', rdg))
level11.append(('log', log))
level11.append(('lda', lda))
level11.append(('stacking', get_stacking()))

In [None]:
stack11_hv = StackingClassifier(estimators=level11, final_estimator=hard_voting, cv=5)
stack11_hv = stack11_hv.fit(X, Y.values.ravel())
stack11_hv_y_pred = stack11_hv.predict(X_test)

In [None]:
print('Precision with 11 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level11 = list()
    level11.append(('hard voting', hard_voting))
    level11.append(('rfc', rfc))
    level11.append(('xgbc tuned', xgbc_tuned))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('gnb', gnb))
    level11.append(('dtc', dtc))
    level11.append(('knn', knn_7))
    level11.append(('qda', qda))
    level11.append(('rdg', rdg))
    level11.append(('log', log))
    level11.append(('lda', lda))
    model = StackingClassifier(estimators=level11, final_estimator=rfc, cv=5)
    return model

In [None]:
stack11_rfc = StackingClassifier(estimators=level11, final_estimator=rfc, cv=5)
stack11_rfc = stack11_rfc.fit(X, Y.values.ravel())
stack11_rfc_y_pred = stack11_rfc.predict(X_test)

In [None]:
print('Precision with 11 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level11 = list()
    level11.append(('hard voting', hard_voting))
    level11.append(('rfc', rfc))
    level11.append(('xgbc tuned', xgbc_tuned))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('gnb', gnb))
    level11.append(('dtc', dtc))
    level11.append(('knn', knn_7))
    level11.append(('qda', qda))
    level11.append(('rdg', rdg))
    level11.append(('log', log))
    level11.append(('lda', lda))
    model = StackingClassifier(estimators=level11, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack11_xgbc_t = StackingClassifier(estimators=level11, final_estimator=xgbc_tuned, cv=5)
stack11_xgbc_t = stack11_xgbc_t.fit(X, Y.values.ravel())
stack11_xgbc_t_y_pred = stack11_xgbc_t.predict(X_test)

In [None]:
print('Precision with 11 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.2  <a class="anchor" id="8_2_2"></a> Top 10 models

In [None]:
def get_stacking():
    level10 = list()
    level10.append(('hard voting', hard_voting))
    level10.append(('rfc', rfc))
    level10.append(('xgbc tuned', xgbc_tuned))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('gnb', gnb))
    level10.append(('dtc', dtc))
    level10.append(('knn', knn_7))
    level10.append(('qda', qda))
    level10.append(('rdg', rdg))
    level10.append(('log', log))
    model = StackingClassifier(estimators=level10, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level10 = list()
level10.append(('hard voting', hard_voting))
level10.append(('rfc', rfc))
level10.append(('xgbc tuned', xgbc_tuned))
level10.append(('svm tuned', svm_tuned))
level10.append(('gnb', gnb))
level10.append(('dtc', dtc))
level10.append(('knn', knn_7))
level10.append(('qda', qda))
level10.append(('rdg', rdg))
level10.append(('log', log))
level10.append(('stacking', get_stacking()))

In [None]:
stack10_hv = StackingClassifier(estimators=level10, final_estimator=hard_voting, cv=5)
stack10_hv = stack10_hv.fit(X, Y.values.ravel())
stack10_hv_y_pred = stack10_hv.predict(X_test)

In [None]:
print('Precision with 10 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level10 = list()
    level10.append(('hard voting', hard_voting))
    level10.append(('rfc', rfc))
    level10.append(('xgbc tuned', xgbc_tuned))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('gnb', gnb))
    level10.append(('dtc', dtc))
    level10.append(('knn', knn_7))
    level10.append(('qda', qda))
    level10.append(('rdg', rdg))
    level10.append(('log', log))
    model = StackingClassifier(estimators=level10, final_estimator=rfc, cv=5)
    return model

In [None]:
stack10_rfc = StackingClassifier(estimators=level10, final_estimator=rfc, cv=5)
stack10_rfc = stack10_rfc.fit(X, Y.values.ravel())
stack10_rfc_y_pred = stack10_rfc.predict(X_test)

In [None]:
print('Precision with 10 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level10 = list()
    level10.append(('hard voting', hard_voting))
    level10.append(('rfc', rfc))
    level10.append(('xgbc tuned', xgbc_tuned))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('gnb', gnb))
    level10.append(('dtc', dtc))
    level10.append(('knn', knn_7))
    level10.append(('qda', qda))
    level10.append(('rdg', rdg))
    level10.append(('log', log))
    model = StackingClassifier(estimators=level10, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack10_xgbc_t = StackingClassifier(estimators=level10, final_estimator=xgbc_tuned, cv=5)
stack10_xgbc_t = stack10_xgbc_t.fit(X, Y.values.ravel())
stack10_xgbc_t_y_pred = stack10_xgbc_t.predict(X_test)

In [None]:
print('Precision with 10 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.3  <a class="anchor" id="8_2_3"></a> Top 9 models

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level9 = list()
level9.append(('hard voting', hard_voting))
level9.append(('rfc', rfc))
level9.append(('xgbc tuned', xgbc_tuned))
level9.append(('svm tuned', svm_tuned))
level9.append(('gnb', gnb))
level9.append(('dtc', dtc))
level9.append(('knn', knn_7))
level9.append(('qda', qda))
level9.append(('rdg', rdg))
level9.append(('stacking', get_stacking()))

In [None]:
stack9_hv = StackingClassifier(estimators=level9, final_estimator=hard_voting, cv=5)
stack9_hv = stack9_hv.fit(X, Y.values.ravel())
stack9_hv_y_pred = stack9_hv.predict(X_test)

In [None]:
print('Precision with 9 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)
    return model

In [None]:
stack9_rfc = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)
stack9_rfc = stack9_rfc.fit(X, Y.values.ravel())
stack9_rfc_y_pred = stack9_rfc.predict(X_test)

In [None]:
print('Precision with 9 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack9_xgbc_t = StackingClassifier(estimators=level9, final_estimator=xgbc_tuned, cv=5)
stack9_xgbc_t = stack9_xgbc_t.fit(X, Y.values.ravel())
stack9_xgbc_t_y_pred = stack9_xgbc_t.predict(X_test)

In [None]:
print('Precision with 9 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.4  <a class="anchor" id="8_2_4"></a> Top 8 models

In [None]:
def get_stacking():
    level8 = list()
    level8.append(('hard voting', hard_voting))
    level8.append(('rfc', rfc))
    level8.append(('xgbc tuned', xgbc_tuned))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('gnb', gnb))
    level8.append(('dtc', dtc))
    level8.append(('knn', knn_7))
    level8.append(('qda', qda))
    model = StackingClassifier(estimators=level8, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level8 = list()
level8.append(('hard voting', hard_voting))
level8.append(('rfc', rfc))
level8.append(('xgbc tuned', xgbc_tuned))
level8.append(('svm tuned', svm_tuned))
level8.append(('gnb', gnb))
level8.append(('dtc', dtc))
level8.append(('knn', knn_7))
level8.append(('qda', qda))
level8.append(('stacking', get_stacking()))

In [None]:
stack8_hv = StackingClassifier(estimators=level8, final_estimator=hard_voting, cv=5)
stack8_hv = stack8_hv.fit(X, Y.values.ravel())
stack8_hv_y_pred = stack8_hv.predict(X_test)

In [None]:
print('Precision with 8 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level8 = list()
    level8.append(('hard voting', hard_voting))
    level8.append(('rfc', rfc))
    level8.append(('xgbc tuned', xgbc_tuned))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('gnb', gnb))
    level8.append(('dtc', dtc))
    level8.append(('knn', knn_7))
    level8.append(('qda', qda))
    model = StackingClassifier(estimators=level8, final_estimator=rfc, cv=5)
    return model

In [None]:
stack8_rfc = StackingClassifier(estimators=level8, final_estimator=rfc, cv=5)
stack8_rfc = stack8_rfc.fit(X, Y.values.ravel())
stack8_rfc_y_pred = stack8_rfc.predict(X_test)

In [None]:
print('Precision with 8 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level8 = list()
    level8.append(('hard voting', hard_voting))
    level8.append(('rfc', rfc))
    level8.append(('xgbc tuned', xgbc_tuned))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('gnb', gnb))
    level8.append(('dtc', dtc))
    level8.append(('knn', knn_7))
    level8.append(('qda', qda))
    model = StackingClassifier(estimators=level8, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack8_xgbc_t = StackingClassifier(estimators=level8, final_estimator=xgbc_tuned, cv=5)
stack8_xgbc_t = stack8_xgbc_t.fit(X, Y.values.ravel())
stack8_xgbc_t_y_pred = stack8_xgbc_t.predict(X_test)

In [None]:
print('Precision with 8 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.5  <a class="anchor" id="8_2_5"></a> Top 7 models

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level9 = list()
level9.append(('hard voting', hard_voting))
level9.append(('rfc', rfc))
level9.append(('xgbc tuned', xgbc_tuned))
level9.append(('svm tuned', svm_tuned))
level9.append(('gnb', gnb))
level9.append(('dtc', dtc))
level9.append(('knn', knn_7))
level9.append(('qda', qda))
level9.append(('rdg', rdg))
level9.append(('stacking', get_stacking()))

In [None]:
stack9_hv = StackingClassifier(estimators=level9, final_estimator=hard_voting, cv=5)
stack9_hv = stack9_hv.fit(X, Y.values.ravel())
stack9_hv_y_pred = stack9_hv.predict(X_test)

In [None]:
print('Precision with 9 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)
    return model

In [None]:
stack9_rfc = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)
stack9_rfc = stack9_rfc.fit(X, Y.values.ravel())
stack9_rfc_y_pred = stack9_rfc.predict(X_test)

In [None]:
print('Precision with 9 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level9 = list()
    level9.append(('hard voting', hard_voting))
    level9.append(('rfc', rfc))
    level9.append(('xgbc tuned', xgbc_tuned))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('gnb', gnb))
    level9.append(('dtc', dtc))
    level9.append(('knn', knn_7))
    level9.append(('qda', qda))
    level9.append(('rdg', rdg))
    model = StackingClassifier(estimators=level9, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack9_xgbc_t = StackingClassifier(estimators=level9, final_estimator=xgbc_tuned, cv=5)
stack9_xgbc_t = stack9_xgbc_t.fit(X, Y.values.ravel())
stack9_xgbc_t_y_pred = stack9_xgbc_t.predict(X_test)

In [None]:
print('Precision with 9 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.6  <a class="anchor" id="8_2_6"></a> Top 6 models

#### 8.2.7  <a class="anchor" id="8_2_7"></a> Top 5 models

#### 8.2.8  <a class="anchor" id="8_2_8"></a> Top 4 models

#### 8.2.9  <a class="anchor" id="8_2_9"></a> Top 3 models

In [None]:
def get_stacking():
    level3 = list()
    level3.append(('hard voting', hard_voting))
    level3.append(('rfc', rfc))
    level3.append(('xgbc tuned', xgbc_tuned))
    model = StackingClassifier(estimators=level3, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level3 = list()
level3.append(('hard voting', hard_voting))
level3.append(('rfc', rfc))
level3.append(('xgbc tuned', xgbc_tuned))
level3.append(('stacking', get_stacking()))

In [None]:
stack3_hv = StackingClassifier(estimators=level3, final_estimator=hard_voting, cv=5)
stack3_hv = stack3_hv.fit(X, Y.values.ravel())
stack3_hv_y_pred = stack3_hv.predict(X_test)

In [None]:
print('Precision with 3 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level3 = list()
    level3.append(('hard voting', hard_voting))
    level3.append(('rfc', rfc))
    level3.append(('xgbc tuned', xgbc_tuned))
    model = StackingClassifier(estimators=level3, final_estimator=rfc, cv=5)
    return model

In [None]:
stack3_rfc = StackingClassifier(estimators=level3, final_estimator=rfc, cv=5)
stack3_rfc = stack3_rfc.fit(X, Y.values.ravel())
stack3_rfc_y_pred = stack3_rfc.predict(X_test)

In [None]:
print('Precision with 3 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level3 = list()
    level3.append(('hard voting', hard_voting))
    level3.append(('rfc', rfc))
    level3.append(('xgbc tuned', xgbc_tuned))
    model = StackingClassifier(estimators=level3, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack3_xgbc_t = StackingClassifier(estimators=level3, final_estimator=xgbc_tuned, cv=5)
stack3_xgbc_t = stack3_xgbc_t.fit(X, Y.values.ravel())
stack3_xgbc_t_y_pred = stack3_xgbc_t.predict(X_test)

In [None]:
print('Precision with 3 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_xgbc_t_y_pred)), 3)*100)+'%')

#### 8.2.10  <a class="anchor" id="8_2_10"></a> Top 2 models

In [None]:
def get_stacking():
    level2 = list()
    level2.append(('hard voting', hard_voting))
    level2.append(('rfc', rfc))
    model = StackingClassifier(estimators=level2, final_estimator=hard_voting, cv=5)
    return model

In [None]:
level2 = list()
level2.append(('hard voting', hard_voting))
level2.append(('rfc', rfc))
level2.append(('stacking', get_stacking()))

In [None]:
stack2_hv = StackingClassifier(estimators=level2, final_estimator=hard_voting, cv=5)
stack2_hv = stack2_hv.fit(X, Y.values.ravel())
stack2_hv_y_pred = stack2_hv.predict(X_test)

In [None]:
print('Precision with 2 models learnt on HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_hv_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level2 = list()
    level2.append(('hard voting', hard_voting))
    level2.append(('rfc', rfc))
    model = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)
    return model

In [None]:
stack2_rfc = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)
stack2_rfc = stack2_rfc.fit(X, Y.values.ravel())
stack2_rfc_y_pred = stack2_rfc.predict(X_test)

In [None]:
print('Precision with 2 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_rfc_y_pred)), 3)*100)+'%')

In [None]:
def get_stacking():
    level2 = list()
    level2.append(('hard voting', hard_voting))
    level2.append(('rfc', rfc))
    model = StackingClassifier(estimators=level2, final_estimator=xgbc_tuned, cv=5)
    return model

In [None]:
stack2_xgbc_t = StackingClassifier(estimators=level2, final_estimator=xgbc_tuned, cv=5)
stack2_xgbc_t = stack2_xgbc_t.fit(X, Y.values.ravel())
stack2_xgbc_t_y_pred = stack2_xgbc_t.predict(X_test)

In [None]:
print('Precision with 2 models learnt on tuned XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_xgbc_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on tuned XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_xgbc_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_xgbc_t_y_pred)), 3)*100)+'%')