<h1 align="center">S2R Analytics</h1>
<h2 align="center">Profitability of Client X projects: run 1</h2>

# Table of Contents

* [Part 6](#part6): Classification
    * [6.0](#6_0): Data splitting
    * [6.1](#6_1): Models
<br />
<br />
* [Part 7](#part7): Fine-tuning
* [Part 8](#part8): Ensemble learning
* [Part 9](#part9): Evaluation of the final model

## Notebook Setup

In [1]:
# Essentials
import pandas as pd
from pandas import Series, DataFrame
from pandas.api.types import CategoricalDtype
pd.options.display.max_columns = None
import sqlite3
import pyodbc
import numpy as np; np.random.seed(1)

# Image creation and display
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.patches as mpatches
from matplotlib import pyplot
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.ticker import FuncFormatter
from yellowbrick.model_selection import FeatureImportances

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_transformer

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import RidgeClassifier
from sklearn.svm import SVC

# Metrics of accuracy
from numpy import mean
from numpy import std
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

# Fine-tuning and enseble learning
from pprint import pprint
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.base import clone
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier

# Other
import itertools as it
import io
import os
os.sys.path
import sys
import glob
import concurrent.futures
from __future__ import print_function
import binascii
import struct
from PIL import Image
import scipy
import scipy.misc
import scipy.cluster
import datetime, time
import functools, operator
from datetime import datetime
from numpy.random import seed
from numpy.random import randn
from numpy import percentile

In [2]:
df = pd.read_csv('../csv-files/complete_projects.csv')

## Part 6: <a class="anchor" id="part6"></a> Classification

### 6.0 <a class="anchor" id="6_0"></a> Data splitting

In [3]:
# Choose dependent variables
Y = df[['Profit Class']]

# Drop the dependent variables from the feature data set
X = df.drop(columns = ['Profit Class', 'Rate Group'])

# Scale the explanatory variables
X1 = pd.DataFrame(StandardScaler().fit_transform(X))
X1.columns = X.columns
X = X1

# Split data set into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.18, random_state=1, stratify = Y)

print(f'No. of training data: {X_train.shape[0]}')
print(f'No. of training targets: {Y_train.shape[0]}')
print(f'No. of testing data: {X_test.shape[0]}')
print(f'No. of testing targets: {Y_test.shape[0]}')

No. of training data: 4797
No. of training targets: 4797
No. of testing data: 1054
No. of testing targets: 1054


### 6.1 <a class="anchor" id="6_1"></a> Models

#### 6.1.1  <a class="anchor" id="6_1_1"></a> Logistic regression

In [9]:
log = LogisticRegression(random_state = 1, max_iter = 30000)
log.fit(X_train, Y_train.values.ravel())
log_y_pred=log.predict(X_test)

print('Precision score of LOG: ' + str(round(metrics.precision_score(Y_test, np.round(log_y_pred), average='weighted', zero_division=0), 3)*100)+'%')
print('F1 of LOG: ' + str(round(metrics.f1_score(Y_test, np.round(log_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of LOG: ' + str(round(metrics.accuracy_score(Y_test, np.round(log_y_pred)), 3)*100)+'%')

Precision score of LOG: 52.400000000000006%
F1 of LOG: 51.300000000000004%
Accuracy score of LOG: 60.099999999999994%


#### 6.1.2 <a class="anchor" id="6_1_2"></a> K-Neighbours classifier

In [10]:
np.random.seed(1)
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, Y_train.values.ravel())
knn_y_pred = knn.predict(X_test)

print('Precision score of KNN-7: ' + str(round(metrics.precision_score(Y_test, np.round(knn_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of KNN-7: ' + str(round(metrics.f1_score(Y_test, np.round(knn_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of KNN-7: ' + str(round(metrics.accuracy_score(Y_test, np.round(knn_y_pred)), 3)*100)+'%')

Precision score of KNN-7: 54.300000000000004%
F1 of KNN-7: 55.1%
Accuracy score of KNN-7: 57.4%


#### 6.1.3  <a class="anchor" id="6_1_3"></a> Decision tree classifier

In [12]:
dtc = DecisionTreeClassifier(random_state = 1)
dtc = dtc.fit(X_train, Y_train.values.ravel())
dtc_y_pred = dtc.predict(X_test)

print('Precision score of DTC: ' + str(round(metrics.precision_score(Y_test, np.round(dtc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of DTC: ' + str(round(metrics.f1_score(Y_test, np.round(dtc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of DTC: ' + str(round(metrics.accuracy_score(Y_test, np.round(dtc_y_pred)), 3)*100)+'%')

Precision score of DTC: 50.1%
F1 of DTC: 50.3%
Accuracy score of DTC: 50.6%


#### 6.1.4  <a class="anchor" id="6_1_4"></a> Random forest classifier

In [13]:
rfc = RandomForestClassifier(random_state = 1)
rfc.fit(X_train, Y_train.values.ravel())
rfc_y_pred=rfc.predict(X_test)

print('Precision score of RFC: ' + str(round(metrics.precision_score(Y_test, np.round(rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of RFC: ' + str(round(metrics.f1_score(Y_test, np.round(rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(rfc_y_pred)), 3)*100)+'%')

Precision score of RFC: 57.3%
F1 of RFC: 58.3%
Accuracy score of RFC: 61.9%


#### 6.1.5  <a class="anchor" id="6_1_5"></a> XGBoost classifier

In [14]:
xgbc = XGBClassifier(n_estimators=100, learning_rate=0.05, booster='gbtree', random_state = 1, eval_metric='mlogloss', use_label_encoder=False)
xgbc.fit(X_train, Y_train.values.ravel())
xgbc_y_pred=xgbc.predict(X_test)

print('Precision score of XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(xgbc_y_pred)), 3)*100)+'%')

Precision score of XGBC: 59.699999999999996%
F1 of XGBC: 58.099999999999994%
Accuracy score of XGBC: 63.2%


#### 6.1.6  <a class="anchor" id="6_1_6"></a> Naive Bayes

In [15]:
gnb = GaussianNB()
gnb.fit(X_train, Y_train.values.ravel())
gnb_y_pred = gnb.predict(X_test)

print('Precision score of GNB: ' + str(round(metrics.precision_score(Y_test, np.round(gnb_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of GNB: ' + str(round(metrics.f1_score(Y_test, np.round(gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of GNB: ' + str(round(metrics.accuracy_score(Y_test, np.round(gnb_y_pred)), 3)*100)+'%')

Precision score of GNB: 72.7%
F1 of GNB: 1.4000000000000001%
Accuracy score of GNB: 7.6%


#### 6.1.7  <a class="anchor" id="6_1_7"></a> Linear discriminant analysis

In [16]:
lda = LinearDiscriminantAnalysis(n_components = 2)
lda.fit(X_train, Y_train.values.ravel())
lda_y_pred = lda.predict(X_test)

print('Precision score of LDA: ' + str(round(metrics.precision_score(Y_test, np.round(lda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of LDA: ' + str(round(metrics.f1_score(Y_test, np.round(lda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of LDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(lda_y_pred)), 3)*100)+'%')

Precision score of LDA: 54.0%
F1 of LDA: 50.7%
Accuracy score of LDA: 59.9%


#### 6.1.8  <a class="anchor" id="6_1_8"></a> Quadratic discriminant analysis

In [17]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, Y_train.values.ravel())
qda_y_pred = qda.predict(X_test)

print('Precision score of QDA: ' + str(round(metrics.precision_score(Y_test, np.round(qda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of QDA: ' + str(round(metrics.f1_score(Y_test, np.round(qda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of QDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(qda_y_pred)), 3)*100)+'%')

Precision score of QDA: 67.5%
F1 of QDA: 1.2%
Accuracy score of QDA: 7.5%




#### 6.1.9  <a class="anchor" id="6_1_9"></a> Ridge regression classifier

In [18]:
rdg = RidgeClassifier(alpha=1.0, random_state = 1, max_iter = 30000)
rdg.fit(X_train, Y_train.values.ravel())
rdg_y_pred=rdg.predict(X_test)

print('Precision score of RDG: ' + str(round(metrics.precision_score(Y_test, np.round(rdg_y_pred), average='weighted', zero_division=0), 3)*100)+'%')
print('F1 of RDG: ' + str(round(metrics.f1_score(Y_test, np.round(rdg_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of RDG: ' + str(round(metrics.accuracy_score(Y_test, np.round(rdg_y_pred)), 3)*100)+'%')

Precision score of RDG: 52.300000000000004%
F1 of RDG: 50.6%
Accuracy score of RDG: 60.0%


#### 6.1.10  <a class="anchor" id="6_1_10"></a> Support vector machines

In [19]:
svm = SVC(kernel='linear', random_state = 1, probability=True)
svm.fit(X_train, Y_train.values.ravel())
svm_y_pred = svm.predict(X_test)

print('Precision score of SVM: ' + str(round(metrics.precision_score(Y_test, np.round(svm_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of SVM: ' + str(round(metrics.f1_score(Y_test, np.round(svm_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(svm_y_pred)), 3)*100)+'%')

Precision score of SVM: 60.5%
F1 of SVM: 47.8%
Accuracy score of SVM: 59.599999999999994%


## Part 7: <a class="anchor" id="part7"></a> Fine-tuning

### 7.1  <a class="anchor" id="7_1"></a> XGBoost grid search

In [20]:
print('Parameters currently in use:\n')
pprint(xgbc.get_params())

Parameters currently in use:

{'base_score': 0.5,
 'booster': 'gbtree',
 'callbacks': None,
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'colsample_bytree': 1,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': 'mlogloss',
 'gamma': 0,
 'gpu_id': -1,
 'grow_policy': 'depthwise',
 'importance_type': None,
 'interaction_constraints': '',
 'learning_rate': 0.05,
 'max_bin': 256,
 'max_cat_to_onehot': 4,
 'max_delta_step': 0,
 'max_depth': 6,
 'max_leaves': 0,
 'min_child_weight': 1,
 'missing': nan,
 'monotone_constraints': '()',
 'n_estimators': 100,
 'n_jobs': 0,
 'num_parallel_tree': 1,
 'objective': 'multi:softprob',
 'predictor': 'auto',
 'random_state': 1,
 'reg_alpha': 0,
 'reg_lambda': 1,
 'sampling_method': 'uniform',
 'scale_pos_weight': None,
 'subsample': 1,
 'tree_method': 'exact',
 'use_label_encoder': False,
 'validate_parameters': 1,
 'verbosity': None}


In [147]:
xgbc_grid = {'learning_rate':[0.1, 1],
    'n_estimators':[1000, 1577],
    'max_depth':[4,5,6],
    'min_child_weight':[6,8,10,12],
    'gamma':[i/10.0 for i in range(0,5)],
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)],
    'objective':['binary:logistic'],
    'nthread':[4],
    'seed':[1],
    'eval_metric':['mlogloss']}

pprint(xgbc_grid)

{'colsample_bytree': [0.6, 0.7, 0.8, 0.9],
 'eval_metric': ['mlogloss'],
 'gamma': [0.0, 0.1, 0.2, 0.3, 0.4],
 'learning_rate': [0.1, 1],
 'max_depth': [4, 5, 6],
 'min_child_weight': [6, 8, 10, 12],
 'n_estimators': [1000, 1577],
 'nthread': [4],
 'objective': ['binary:logistic'],
 'seed': [1],
 'subsample': [0.6, 0.7, 0.8, 0.9]}


In [None]:
# Fitting the model for grid search
xgbc_tuned = GridSearchCV(XGBClassifier(), xgbc_grid, refit = True)
xgbc_tuned.fit(X_train, Y_train.values.ravel())

In [47]:
print(xgbc_tuned.best_params_)
print(xgbc_tuned.best_estimator_)

{'colsample_bytree': 0.6, 'eval_metric': 'mlogloss', 'gamma': 0.3, 'learning_rate': 0.1, 'max_depth': 4, 'min_child_weight': 12, 'n_estimators': 1000, 'nthread': 4, 'objective': 'binary:logistic', 'seed': 1, 'subsample': 0.9}
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.6,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric='mlogloss', gamma=0.3, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.1, max_bin=256,
              max_cat_to_onehot=4, max_delta_step=0, max_depth=4, max_leaves=0,
              min_child_weight=12, missing=nan, monotone_constraints='()',
              n_estimators=1000, n_jobs=4, nthread=4, num_parallel_tree=1,
              objective='multi:softprob', predictor='auto', random_state=1, ...)


In [21]:
# Create a XGBoost_tuned model
xgbc_tuned = XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.6,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric='mlogloss', gamma=0.01, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.1, max_bin=256,
              max_cat_to_onehot=4, max_delta_step=0, max_depth=4, max_leaves=0,
              min_child_weight=12, monotone_constraints='()',
              n_estimators=1577, n_jobs=4, nthread=4, num_parallel_tree=1,
              objective='multi:softprob', predictor='auto', random_state=1)

In [22]:
# Base model results
xgbc_base_y_pred = xgbc.predict(X_test)
xgbc_base_precision = round(metrics.precision_score(Y_test, np.round(xgbc_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base XGBC is ' + str(xgbc_base_precision)+'%')

# Tuned model results
xgbc_tuned.fit(X_train, Y_train.values.ravel())
xgbc_tuned_y_pred = xgbc_tuned.predict(X_test)
xgbc_tuned_precision = round(metrics.precision_score(Y_test, np.round(xgbc_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned XGBC is ' + str(xgbc_tuned_precision)+'%')

# Comparison
print('Improvement of {:0.2f}%'.format(100 * (xgbc_tuned_precision - xgbc_base_precision) / xgbc_base_precision))

Precision of base XGBC is 59.699999999999996%
Precision of tuned XGBC is 56.39999999999999%
Improvement of -5.53%


In [23]:
# Rest of the measures
print('F1 of tuned XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(xgbc_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy of tuned XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(xgbc_tuned_y_pred)), 3)*100)+'%')

F1 of tuned XGBC: 57.3%
Accuracy of tuned XGBC: 59.9%


### 7.2  <a class="anchor" id="7_2"></a> Random forest classifier grid search

In [94]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 2000, num = 10)]

# Number of features to consider at every split
max_features = ['sqrt']

# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)

# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]

# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
                     'max_features': max_features,
                     'max_depth': max_depth,
                     'min_samples_split': min_samples_split,
                      'min_samples_leaf': min_samples_leaf,
                      'bootstrap': bootstrap}
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'max_features': ['sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [100, 311, 522, 733, 944, 1155, 1366, 1577, 1788, 2000]}


In [95]:
rfc_tuned = RandomizedSearchCV(estimator = rfc,
                               param_distributions = random_grid,
                               n_iter = 100,
                               cv = 5,
                               verbose = 2,
                               random_state = 1,
                               n_jobs = -1)
                               
# Fit the random search model
rfc_tuned.fit(X_train, Y_train.values.ravel())

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [101]:
# Print best parameter after tuning
print(rfc_tuned.best_params_)

{'n_estimators': 1577, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 10, 'bootstrap': False}


In [102]:
# Print how our model looks after hyper-parameter tuning
print(rfc_tuned.best_estimator_)

RandomForestClassifier(bootstrap=False, max_depth=10, min_samples_leaf=4,
                       n_estimators=1577, random_state=1)


In [24]:
# Create an RFC_tuned model
rfc_tuned = RandomForestClassifier(n_estimators=1577, min_samples_split=2, min_samples_leaf=4, max_features='sqrt', max_depth=10, bootstrap=False)

In [25]:
# Base model results
rfc_base_y_pred = rfc.predict(X_test)
rfc_base_precision = round(metrics.precision_score(Y_test, np.round(rfc_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base RFC is ' + str(rfc_base_precision)+'%')

# Tuned model results
rfc_tuned.fit(X_train, Y_train.values.ravel())
rfc_tuned_y_pred = rfc_tuned.predict(X_test)
rfc_tuned_precision = round(metrics.precision_score(Y_test, np.round(rfc_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned RFC is ' + str(rfc_tuned_precision)+'%')

# Comparison
print('Improvement of {:0.1f}%'.format(100 * (rfc_tuned_precision - rfc_base_precision) / rfc_base_precision))

Precision of base RFC is 57.3%
Precision of tuned RFC is 59.599999999999994%
Improvement of 4.0%


In [27]:
print('F1 of tuned RFC: ' + str(round(metrics.f1_score(Y_test, np.round(rfc_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of tuned RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(rfc_tuned_y_pred)), 3)*100)+'%')

F1 of tuned RFC: 55.60000000000001%
Accuracy score of tuned RFC: 62.4%


### 7.3  <a class="anchor" id="7_3"></a> SVM RBF grid search

In [120]:
# Look at parameters used by our current SVM model
print('Parameters currently in use:\n')
pprint(svm.get_params())

Parameters currently in use:

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'probability': True,
 'random_state': 1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}


In [129]:
# Defining parameter range
svm_grid = {'C': [0.1, 1, 2, 3, 4, 5, 10],
            'gamma': [1, 0.1, 0.01, 2, 3, 4, 5, 6, 10],
            'kernel': ['linear', 'rbf']}
 
# Fitting the model for grid search
svm_tuned = GridSearchCV(SVC(), svm_grid, refit = True) 
svm_tuned.fit(X_train, Y_train.values.ravel())

In [131]:
# Print best parameter after tuning
print(svm_tuned.best_params_)

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


In [132]:
# Print how our model looks after hyper-parameter tuning
print(svm_tuned.best_estimator_)

SVC(C=1, gamma=0.1)


In [28]:
# Create a tuned SVC model with linear kernel
svm_tuned = SVC(kernel='rbf', C = 3, gamma = 0.01, random_state = 1, probability=True)
svm_tuned.fit(X_train, Y_train.values.ravel())

In [29]:
# Base model results
svm_base_y_pred = svm.predict(X_test)
svm_base_precision = round(metrics.precision_score(Y_test, np.round(svm_base_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of base SVM is ' + str(svm_base_precision)+'%')

# Tuned model results with kernel
svm_tuned_y_pred = svm_tuned.predict(X_test)
svm_tuned_precision = round(metrics.precision_score(Y_test, np.round(svm_tuned_y_pred), average='weighted', zero_division=1), 3)*100
print('Precision of tuned SVM with linear kernel is ' + str(svm_tuned_precision)+'%')

print('Improvement of {:0.2f}%'.format(100 * (svm_tuned_precision - svm_base_precision) / svm_base_precision))

Precision of base SVM is 60.5%
Precision of tuned SVM with linear kernel is 62.7%
Improvement of 3.64%


In [30]:
# Rest of the measures
print('F1 of tuned SVM with linear kernel: ' + str(round(metrics.f1_score(Y_test, np.round(svm_tuned_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy of tuned SVM with linear kernel: ' + str(round(metrics.accuracy_score(Y_test, np.round(svm_tuned_y_pred)), 3)*100)+'%')

F1 of tuned SVM with linear kernel: 52.300000000000004%
Accuracy of tuned SVM with linear kernel: 61.199999999999996%


## Part 8: <a class="anchor" id="part8"></a> Ensemble learning

### 8.1  <a class="anchor" id="8_1"></a> Voting classifier

In [31]:
soft_voting = VotingClassifier(estimators=[('svm_t', svm_tuned), ('xgbc', xgbc), ('rfc_t', rfc_tuned)], voting='soft')
soft_voting.fit(X_train, Y_train.values.ravel())
sv_y_pred = soft_voting.predict(X_test)

In [33]:
print('Precision score of soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(sv_y_pred)), 3)*100)+'%')

Precision score of soft voting classifier: 63.800000000000004%
F1 of soft voting classifier: 55.00000000000001%
Accuracy score of soft voting classifier: 62.5%


In [34]:
hard_voting = VotingClassifier(estimators=[('svm_t', svm_tuned), ('xgbc', xgbc), ('rfc_t', rfc_tuned)],voting='hard')
hard_voting.fit(X_train, Y_train.values.ravel())
hv_y_pred = hard_voting.predict(X_test)

In [35]:
print('Precision score of hard voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 of hard voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(hv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score of hard voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(hv_y_pred)), 3)*100)+'%')

Precision score of hard voting classifier: 59.8%
F1 of hard voting classifier: 55.7%
Accuracy score of hard voting classifier: 62.5%


### 8.2  <a class="anchor" id="8_2"></a> Stacking

#### 8.2.1  <a class="anchor" id="8_2_1"></a> All models

In [268]:
def get_stacking():
    level11 = list()
    level11.append(('soft voting', soft_voting))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('xgbc', xgbc))
    level11.append(('rfc tuned', rfc_tuned))
    level11.append(('knn', knn))
    level11.append(('lda', lda))
    level11.append(('log', log))
    level11.append(('rdg', rdg))
    level11.append(('dtc', dtc))
    level11.append(('gnb', gnb))
    level11.append(('qda', qda))
    model = StackingClassifier(estimators=level11, final_estimator=soft_voting, cv=5)
    return model

In [269]:
level11 = list()
level11.append(('soft voting', soft_voting))
level11.append(('svm tuned', svm_tuned))
level11.append(('xgbc', xgbc))
level11.append(('rfc tuned', rfc_tuned))
level11.append(('knn', knn))
level11.append(('lda', lda))
level11.append(('log', log))
level11.append(('rdg', rdg))
level11.append(('dtc', dtc))
level11.append(('gnb', gnb))
level11.append(('qda', qda))
level11.append(('stacking', get_stacking()))

In [270]:
stack11_sv = StackingClassifier(estimators=level11, final_estimator=soft_voting, cv=5)
stack11_sv = stack11_sv.fit(X, Y.values.ravel())
stack11_sv_y_pred = stack11_sv.predict(X_test)



In [271]:
print('Precision with 11 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_sv_y_pred)), 3)*100)+'%')

Precision with 11 models learnt on soft voting classifier: 61.6%
F1 with 11 models learnt on soft voting classifier: 54.300000000000004%
Recall with 11 models learnt on soft voting classifier: 60.9%
Accuracy with 11 models learnt on soft voting classifier: 60.9%


In [272]:
def get_stacking():
    level11 = list()
    level11.append(('soft voting', soft_voting))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('xgbc', xgbc))
    level11.append(('rfc tuned', rfc_tuned))
    level11.append(('knn', knn))
    level11.append(('lda', lda))
    level11.append(('log', log))
    level11.append(('rdg', rdg))
    level11.append(('dtc', dtc))
    level11.append(('gnb', gnb))
    level11.append(('qda', qda))
    model = StackingClassifier(estimators=level11, final_estimator=svm_tuned, cv=5)
    return model

In [273]:
stack11_svm_t = StackingClassifier(estimators=level11, final_estimator=svm_tuned, cv=5)
stack11_svm_t = stack11_svm_t.fit(X, Y.values.ravel())
stack11_svm_t_y_pred = stack11_svm_t.predict(X_test)



In [274]:
print('Precision with 11 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_svm_t_y_pred)), 3)*100)+'%')

Precision with 11 models learnt on tuned SVM: 64.1%
F1 with 11 models learnt on tuned SVM: 50.2%
Recall with 11 models learnt on tuned SVM: 60.9%
Accuracy with 11 models learnt on tuned SVM: 60.9%


In [275]:
def get_stacking():
    level11 = list()
    level11.append(('soft voting', soft_voting))
    level11.append(('svm tuned', svm_tuned))
    level11.append(('xgbc', xgbc))
    level11.append(('rfc tuned', rfc_tuned))
    level11.append(('knn', knn))
    level11.append(('lda', lda))
    level11.append(('log', log))
    level11.append(('rdg', rdg))
    level11.append(('dtc', dtc))
    level11.append(('gnb', gnb))
    level11.append(('qda', qda))
    model = StackingClassifier(estimators=level11, final_estimator=xgbc, cv=5)
    return model

In [276]:
stack11_xgbc = StackingClassifier(estimators=level11, final_estimator=xgbc, cv=5)
stack11_xgbc = stack11_xgbc.fit(X, Y.values.ravel())
stack11_xgbc_y_pred = stack11_xgbc.predict(X_test)



In [277]:
print('Precision with 11 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack11_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 11 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack11_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 11 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack11_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 11 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack11_xgbc_y_pred)), 3)*100)+'%')

Precision with 11 models learnt on base XGBC: 51.9%
F1 with 11 models learnt on base XGBC: 53.1%
Recall with 11 models learnt on base XGBC: 56.99999999999999%
Accuracy with 11 models learnt on base XGBC: 56.99999999999999%


#### 8.2.2  <a class="anchor" id="8_2_2"></a> Top 10 models

In [258]:
def get_stacking():
    level10 = list()
    level10.append(('soft voting', soft_voting))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('xgbc', xgbc))
    level10.append(('rfc tuned', rfc_tuned))
    level10.append(('knn', knn))
    level10.append(('lda', lda))
    level10.append(('log', log))
    level10.append(('rdg', rdg))
    level10.append(('dtc', dtc))
    level10.append(('gnb', gnb))
    model = StackingClassifier(estimators=level10, final_estimator=soft_voting, cv=5)
    return model

In [259]:
level10 = list()
level10.append(('soft voting', soft_voting))
level10.append(('svm tuned', svm_tuned))
level10.append(('xgbc', xgbc))
level10.append(('rfc tuned', rfc_tuned))
level10.append(('knn', knn))
level10.append(('lda', lda))
level10.append(('log', log))
level10.append(('rdg', rdg))
level10.append(('dtc', dtc))
level10.append(('gnb', gnb))
level10.append(('stacking', get_stacking()))

In [260]:
stack10_sv = StackingClassifier(estimators=level10, final_estimator=soft_voting, cv=5)
stack10_sv = stack10_sv.fit(X, Y.values.ravel())
stack10_sv_y_pred = stack10_sv.predict(X_test)

In [261]:
print('Precision with 10 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_sv_y_pred)), 3)*100)+'%')

Precision with 10 models learnt on soft voting classifier: 62.1%
F1 with 10 models learnt on soft voting classifier: 55.50000000000001%
Recall with 10 models learnt on soft voting classifier: 61.3%
Accuracy with 10 models learnt on soft voting classifier: 61.3%


In [262]:
def get_stacking():
    level10 = list()
    level10.append(('soft voting', soft_voting))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('xgbc', xgbc))
    level10.append(('rfc tuned', rfc_tuned))
    level10.append(('knn', knn))
    level10.append(('lda', lda))
    level10.append(('log', log))
    level10.append(('rdg', rdg))
    level10.append(('dtc', dtc))
    level10.append(('gnb', gnb))
    model = StackingClassifier(estimators=level10, final_estimator=svm_tuned, cv=5)
    return model

In [263]:
stack10_svm_t = StackingClassifier(estimators=level10, final_estimator=svm_tuned, cv=5)
stack10_svm_t = stack10_svm_t.fit(X, Y.values.ravel())
stack10_svm_t_y_pred = stack10_svm_t.predict(X_test)

In [264]:
print('Precision with 10 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_svm_t_y_pred)), 3)*100)+'%')

Precision with 10 models learnt on tuned SVM: 66.10000000000001%
F1 with 10 models learnt on tuned SVM: 51.5%
Recall with 10 models learnt on tuned SVM: 61.8%
Accuracy with 10 models learnt on tuned SVM: 61.8%


In [265]:
def get_stacking():
    level10 = list()
    level10.append(('soft voting', soft_voting))
    level10.append(('svm tuned', svm_tuned))
    level10.append(('xgbc', xgbc))
    level10.append(('rfc tuned', rfc_tuned))
    level10.append(('knn', knn))
    level10.append(('lda', lda))
    level10.append(('log', log))
    level10.append(('rdg', rdg))
    level10.append(('dtc', dtc))
    level10.append(('gnb', gnb))
    model = StackingClassifier(estimators=level10, final_estimator=gnb, cv=5)
    return model

In [266]:
stack10_gnb = StackingClassifier(estimators=level10, final_estimator=gnb, cv=5)
stack10_gnb = stack10_gnb.fit(X, Y.values.ravel())
stack10_gnb_y_pred = stack10_gnb.predict(X_test)

In [267]:
print('Precision with 10 models learnt on base GNB: ' + str(round(metrics.precision_score(Y_test, np.round(stack10_gnb_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 10 models learnt on base GNB: ' + str(round(metrics.f1_score(Y_test, np.round(stack10_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 10 models learnt on base GNB: ' + str(round(metrics.recall_score(Y_test, np.round(stack10_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 10 models learnt on base GNB: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack10_gnb_y_pred)), 3)*100)+'%')

Precision with 10 models learnt on base GNB: 54.1%
F1 with 10 models learnt on base GNB: 51.300000000000004%
Recall with 10 models learnt on base GNB: 59.3%
Accuracy with 10 models learnt on base GNB: 59.3%


#### 8.2.3  <a class="anchor" id="8_2_3"></a> Top 9 models

In [246]:
def get_stacking():
    level9 = list()
    level9.append(('soft voting', soft_voting))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('xgbc', xgbc))
    level9.append(('rfc tuned', rfc_tuned))
    level9.append(('knn', knn))
    level9.append(('lda', lda))
    level9.append(('log', log))
    level9.append(('rdg', rdg))
    level9.append(('dtc', dtc))
    model = StackingClassifier(estimators=level9, final_estimator=soft_voting, cv=5)
    return model

In [247]:
level9 = list()
level9.append(('soft voting', soft_voting))
level9.append(('svm tuned', svm_tuned))
level9.append(('xgbc', xgbc))
level9.append(('rfc tuned', rfc_tuned))
level9.append(('knn', knn))
level9.append(('lda', lda))
level9.append(('log', log))
level9.append(('rdg', rdg))
level9.append(('dtc', dtc))
level9.append(('stacking', get_stacking()))

In [248]:
stack9_sv = StackingClassifier(estimators=level9, final_estimator=soft_voting, cv=5)
stack9_sv = stack9_sv.fit(X, Y.values.ravel())
stack9_sv_y_pred = stack9_sv.predict(X_test)

In [249]:
print('Precision with 9 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_sv_y_pred)), 3)*100)+'%')

Precision with 9 models learnt on soft voting classifier: 54.6%
F1 with 9 models learnt on soft voting classifier: 55.1%
Recall with 9 models learnt on soft voting classifier: 61.4%
Accuracy with 9 models learnt on soft voting classifier: 61.4%


In [250]:
def get_stacking():
    level9 = list()
    level9.append(('soft voting', soft_voting))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('xgbc', xgbc))
    level9.append(('rfc tuned', rfc_tuned))
    level9.append(('knn', knn))
    level9.append(('lda', lda))
    level9.append(('log', log))
    level9.append(('rdg', rdg))
    level9.append(('dtc', dtc))
    model = StackingClassifier(estimators=level9, final_estimator=svm_tuned, cv=5)
    return model

In [251]:
stack9_svm_t = StackingClassifier(estimators=level9, final_estimator=svm_tuned, cv=5)
stack9_svm_t = stack9_svm_t.fit(X, Y.values.ravel())
stack9_svm_t_y_pred = stack9_svm_t.predict(X_test)

In [252]:
print('Precision with 9 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_svm_t_y_pred)), 3)*100)+'%')

Precision with 9 models learnt on tuned SVM: 67.0%
F1 with 9 models learnt on tuned SVM: 51.5%
Recall with 9 models learnt on tuned SVM: 62.0%
Accuracy with 9 models learnt on tuned SVM: 62.0%


In [253]:
def get_stacking():
    level9 = list()
    level9.append(('soft voting', soft_voting))
    level9.append(('svm tuned', svm_tuned))
    level9.append(('xgbc', xgbc))
    level9.append(('rfc tuned', rfc_tuned))
    level9.append(('knn', knn))
    level9.append(('lda', lda))
    level9.append(('log', log))
    level9.append(('rdg', rdg))
    level9.append(('dtc', dtc))
    model = StackingClassifier(estimators=level9, final_estimator=xgbc, cv=5)
    return model

In [254]:
stack9_xgbc = StackingClassifier(estimators=level9, final_estimator=xgbc, cv=5)
stack9_xgbc = stack9_xgbc.fit(X, Y.values.ravel())
stack9_xgbc_y_pred = stack9_xgbc.predict(X_test)

In [255]:
print('Precision with 9 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 9 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 9 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 9 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_xgbc_y_pred)), 3)*100)+'%')

Precision with 9 models learnt on base XGBC: 50.0%
F1 with 9 models learnt on base XGBC: 50.8%
Recall with 9 models learnt on base XGBC: 52.0%
Accuracy with 9 models learnt on base XGBC: 52.0%


#### 8.2.4  <a class="anchor" id="8_2_4"></a> Top 8 models

In [234]:
def get_stacking():
    level8 = list()
    level8.append(('soft voting', soft_voting))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('xgbc', xgbc))
    level8.append(('rfc tuned', rfc_tuned))
    level8.append(('knn', knn))
    level8.append(('lda', lda))
    level8.append(('log', log))
    level8.append(('rdg', rdg))
    model = StackingClassifier(estimators=level8, final_estimator=soft_voting, cv=5)
    return model

In [42]:
level8 = list()
level8.append(('soft voting', soft_voting))
level8.append(('svm tuned', svm_tuned))
level8.append(('xgbc', xgbc))
level8.append(('rfc tuned', rfc_tuned))
level8.append(('knn', knn))
level8.append(('lda', lda))
level8.append(('log', log))
level8.append(('rdg', rdg))
level8.append(('stacking', get_stacking()))

In [236]:
stack8_sv = StackingClassifier(estimators=level8, final_estimator=soft_voting, cv=5)
stack8_sv = stack8_sv.fit(X, Y.values.ravel())
stack8_sv_y_pred = stack8_sv.predict(X_test)

In [237]:
print('Precision with 8 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_sv_y_pred)), 3)*100)+'%')

Precision with 8 models learnt on soft voting classifier: 66.0%
F1 with 8 models learnt on soft voting classifier: 58.3%
Recall with 8 models learnt on soft voting classifier: 64.3%
Accuracy with 8 models learnt on soft voting classifier: 64.3%


In [43]:
def get_stacking():
    level8 = list()
    level8.append(('soft voting', soft_voting))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('xgbc', xgbc))
    level8.append(('rfc tuned', rfc_tuned))
    level8.append(('knn', knn))
    level8.append(('lda', lda))
    level8.append(('log', log))
    level8.append(('rdg', rdg))
    model = StackingClassifier(estimators=level8, final_estimator=svm_tuned, cv=5)
    return model

In [239]:
stack8_svm_t = StackingClassifier(estimators=level8, final_estimator=svm_tuned, cv=5)
stack8_svm_t = stack8_svm_t.fit(X, Y.values.ravel())
stack8_svm_t_y_pred = stack8_svm_t.predict(X_test)

In [240]:
print('Precision with 8 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_svm_t_y_pred)), 3)*100)+'%')

Precision with 8 models learnt on tuned SVM: 68.89999999999999%
F1 with 8 models learnt on tuned SVM: 51.7%
Recall with 8 models learnt on tuned SVM: 62.2%
Accuracy with 8 models learnt on tuned SVM: 62.2%


In [241]:
def get_stacking():
    level8 = list()
    level8.append(('soft voting', soft_voting))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('xgbc', xgbc))
    level8.append(('rfc tuned', rfc_tuned))
    level8.append(('knn', knn))
    level8.append(('lda', lda))
    level8.append(('log', log))
    level8.append(('rdg', rdg))
    model = StackingClassifier(estimators=level8, final_estimator=xgbc, cv=5)
    return model

In [242]:
stack8_xgbc = StackingClassifier(estimators=level8, final_estimator=xgbc, cv=5)
stack8_xgbc = stack8_xgbc.fit(X, Y.values.ravel())
stack8_xgbc_y_pred = stack8_xgbc.predict(X_test)

In [243]:
print('Precision with 8 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_xgbc_y_pred)), 3)*100)+'%')

Precision with 8 models learnt on base XGBC: 61.1%
F1 with 8 models learnt on base XGBC: 61.1%
Recall with 8 models learnt on base XGBC: 62.2%
Accuracy with 8 models learnt on base XGBC: 62.2%


In [44]:
def get_stacking():
    level8 = list()
    level8.append(('soft voting', soft_voting))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('xgbc', xgbc))
    level8.append(('rfc tuned', rfc_tuned))
    level8.append(('knn', knn))
    level8.append(('lda', lda))
    level8.append(('gnb', gnb))
    level8.append(('qda', qda))
    model = StackingClassifier(estimators=level8, final_estimator=svm_tuned, cv=5)
    return model

In [45]:
level8 = list()
level8.append(('soft voting', soft_voting))
level8.append(('svm tuned', svm_tuned))
level8.append(('xgbc', xgbc))
level8.append(('rfc tuned', rfc_tuned))
level8.append(('knn', knn))
level8.append(('lda', lda))
level8.append(('gnb', gnb))
level8.append(('qda', qda))
level8.append(('stacking', get_stacking()))

In [46]:
stack8_svm_gnb_qda = StackingClassifier(estimators=level8, final_estimator=svm_tuned, cv=5)
stack8_svm_gnb_qda = stack8_svm_gnb_qda.fit(X, Y.values.ravel())
stack8_svm_gnb_qda_y_pred = stack8_svm_gnb_qda.predict(X_test)



In [47]:
print('Precision with 8 models learnt on SVM with GNB and QDA: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_svm_gnb_qda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on SVM with GNB and QDA: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_svm_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on SVM with GNB and QDA: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_svm_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on SVM with GNB and QDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_svm_gnb_qda_y_pred)), 3)*100)+'%')

Precision with 8 models learnt on SVM with GNB and QDA: 66.7%
F1 with 8 models learnt on SVM with GNB and QDA: 51.0%
Recall with 8 models learnt on SVM with GNB and QDA: 61.7%
Accuracy with 8 models learnt on SVM with GNB and QDA: 61.7%


In [48]:
def get_stacking():
    level8 = list()
    level8.append(('soft voting', soft_voting))
    level8.append(('svm tuned', svm_tuned))
    level8.append(('xgbc', xgbc))
    level8.append(('rfc tuned', rfc_tuned))
    level8.append(('knn', knn))
    level8.append(('lda', lda))
    level8.append(('gnb', gnb))
    level8.append(('qda', qda))
    model = StackingClassifier(estimators=level8, final_estimator=xgbc, cv=5)
    return model

In [49]:
stack8_xgbc_gnb_qda = StackingClassifier(estimators=level8, final_estimator=xgbc, cv=5)
stack8_xgbc_gnb_qda = stack8_svm_gnb_qda.fit(X, Y.values.ravel())
stack8_xgbc_gnb_qda_y_pred = stack8_svm_gnb_qda.predict(X_test)



In [50]:
print('Precision with 8 models learnt on XGBC with GNB and QDA: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_xgbc_gnb_qda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 8 models learnt on XGBC with GNB and QDA: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_xgbc_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 8 models learnt on XGBC with GNB and QDA: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_xgbc_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 8 models learnt on XGBC with GNB and QDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_xgbc_gnb_qda_y_pred)), 3)*100)+'%')

Precision with 8 models learnt on XGBC with GNB and QDA: 66.7%
F1 with 8 models learnt on XGBC with GNB and QDA: 51.0%
Recall with 8 models learnt on XGBC with GNB and QDA: 61.7%
Accuracy with 8 models learnt on XGBC with GNB and QDA: 61.7%


#### 8.2.5  <a class="anchor" id="8_2_5"></a> Top 7 models

In [222]:
def get_stacking():
    level7 = list()
    level7.append(('soft voting', soft_voting))
    level7.append(('svm tuned', svm_tuned))
    level7.append(('xgbc', xgbc))
    level7.append(('rfc tuned', rfc_tuned))
    level7.append(('knn', knn))
    level7.append(('lda', lda))
    level7.append(('log', log))
    model = StackingClassifier(estimators=level7, final_estimator=soft_voting, cv=5)
    return model

In [223]:
level7 = list()
level7.append(('soft voting', soft_voting))
level7.append(('svm tuned', svm_tuned))
level7.append(('xgbc', xgbc))
level7.append(('rfc tuned', rfc_tuned))
level7.append(('knn', knn))
level7.append(('lda', lda))
level7.append(('log', log))
level7.append(('stacking', get_stacking()))

In [224]:
stack7_sv = StackingClassifier(estimators=level7, final_estimator=soft_voting, cv=5)
stack7_sv = stack7_sv.fit(X, Y.values.ravel())
stack7_sv_y_pred = stack7_sv.predict(X_test)

In [225]:
print('Precision with 7 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack7_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 7 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack7_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 7 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack7_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 7 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack7_sv_y_pred)), 3)*100)+'%')

Precision with 7 models learnt on soft voting classifier: 64.4%
F1 with 7 models learnt on soft voting classifier: 57.599999999999994%
Recall with 7 models learnt on soft voting classifier: 63.3%
Accuracy with 7 models learnt on soft voting classifier: 63.3%


In [226]:
def get_stacking():
    level7 = list()
    level7.append(('soft voting', soft_voting))
    level7.append(('svm tuned', svm_tuned))
    level7.append(('xgbc', xgbc))
    level7.append(('rfc tuned', rfc_tuned))
    level7.append(('knn', knn))
    level7.append(('lda', lda))
    level7.append(('log', log))
    model = StackingClassifier(estimators=level7, final_estimator=svm_tuned, cv=5)
    return model

In [227]:
stack7_svm_t = StackingClassifier(estimators=level7, final_estimator=svm_tuned, cv=5)
stack7_svm_t = stack7_svm_t.fit(X, Y.values.ravel())
stack7_svm_t_y_pred = stack7_svm_t.predict(X_test)

In [228]:
print('Precision with 7 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack7_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 7 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack7_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 7 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack7_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 7 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack7_svm_t_y_pred)), 3)*100)+'%')

Precision with 7 models learnt on tuned SVM: 69.19999999999999%
F1 with 7 models learnt on tuned SVM: 51.5%
Recall with 7 models learnt on tuned SVM: 62.1%
Accuracy with 7 models learnt on tuned SVM: 62.1%


In [229]:
def get_stacking():
    level7 = list()
    level7.append(('soft voting', soft_voting))
    level7.append(('svm tuned', svm_tuned))
    level7.append(('xgbc', xgbc))
    level7.append(('rfc tuned', rfc_tuned))
    level7.append(('knn', knn))
    level7.append(('lda', lda))
    level7.append(('log', log))
    model = StackingClassifier(estimators=level7, final_estimator=xgbc, cv=5)
    return model

In [230]:
stack7_xgbc = StackingClassifier(estimators=level7, final_estimator=xgbc, cv=5)
stack7_xgbc = stack7_xgbc.fit(X, Y.values.ravel())
stack7_xgbc_y_pred = stack7_xgbc.predict(X_test)

In [231]:
print('Precision with 7 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack7_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 7 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack7_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 7 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack7_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 7 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack7_xgbc_y_pred)), 3)*100)+'%')

Precision with 7 models learnt on base XGBC: 56.00000000000001%
F1 with 7 models learnt on base XGBC: 57.4%
Recall with 7 models learnt on base XGBC: 59.8%
Accuracy with 7 models learnt on base XGBC: 59.8%


#### 8.2.6  <a class="anchor" id="8_2_6"></a> Top 6 models

In [212]:
def get_stacking():
    level6 = list()
    level6.append(('soft voting', soft_voting))
    level6.append(('svm tuned', svm_tuned))
    level6.append(('xgbc', xgbc))
    level6.append(('rfc tuned', rfc_tuned))
    level6.append(('knn', knn))
    level6.append(('lda', lda))
    model = StackingClassifier(estimators=level6, final_estimator=soft_voting, cv=5)
    return model

In [213]:
level6 = list()
level6.append(('soft voting', soft_voting))
level6.append(('svm tuned', svm_tuned))
level6.append(('xgbc', xgbc))
level6.append(('rfc tuned', rfc_tuned))
level6.append(('knn', knn))
level6.append(('lda', lda))
level6.append(('stacking', get_stacking()))

In [214]:
stack6_sv = StackingClassifier(estimators=level6, final_estimator=soft_voting, cv=5)
stack6_sv = stack6_sv.fit(X, Y.values.ravel())
stack6_sv_y_pred = stack6_sv.predict(X_test)

In [215]:
print('Precision with 6 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack6_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 6 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack6_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 6 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack6_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 6 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack6_sv_y_pred)), 3)*100)+'%')

Precision with 6 models learnt on soft voting classifier: 65.3%
F1 with 6 models learnt on soft voting classifier: 57.599999999999994%
Recall with 6 models learnt on soft voting classifier: 63.7%
Accuracy with 6 models learnt on soft voting classifier: 63.7%


In [216]:
def get_stacking():
    level6 = list()
    level6.append(('soft voting', soft_voting))
    level6.append(('svm tuned', svm_tuned))
    level6.append(('xgbc', xgbc))
    level6.append(('rfc tuned', rfc_tuned))
    level6.append(('knn', knn))
    level6.append(('lda', lda))
    model = StackingClassifier(estimators=level6, final_estimator=svm_tuned, cv=5)
    return model

In [217]:
stack6_svm_t = StackingClassifier(estimators=level6, final_estimator=svm_tuned, cv=5)
stack6_svm_t = stack6_svm_t.fit(X, Y.values.ravel())
stack6_svm_t_y_pred = stack6_svm_t.predict(X_test)

In [218]:
print('Precision with 6 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack6_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 6 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack6_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 6 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack6_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 6 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack6_svm_t_y_pred)), 3)*100)+'%')

Precision with 6 models learnt on tuned SVM: 69.19999999999999%
F1 with 6 models learnt on tuned SVM: 51.5%
Recall with 6 models learnt on tuned SVM: 62.1%
Accuracy with 6 models learnt on tuned SVM: 62.1%


In [219]:
def get_stacking():
    level6 = list()
    level6.append(('soft voting', soft_voting))
    level6.append(('svm tuned', svm_tuned))
    level6.append(('xgbc', xgbc))
    level6.append(('rfc tuned', rfc_tuned))
    level6.append(('knn', knn))
    level6.append(('lda', lda))
    model = StackingClassifier(estimators=level6, final_estimator=xgbc, cv=5)
    return model

In [220]:
stack6_xgbc = StackingClassifier(estimators=level6, final_estimator=xgbc, cv=5)
stack6_xgbc = stack6_xgbc.fit(X, Y.values.ravel())
stack6_xgbc_y_pred = stack6_xgbc.predict(X_test)

In [221]:
print('Precision with 6 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack6_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 6 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack6_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 6 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack6_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 6 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack6_xgbc_y_pred)), 3)*100)+'%')

Precision with 6 models learnt on base XGBC: 58.699999999999996%
F1 with 6 models learnt on base XGBC: 58.199999999999996%
Recall with 6 models learnt on base XGBC: 59.199999999999996%
Accuracy with 6 models learnt on base XGBC: 59.199999999999996%


In [284]:
def get_stacking():
    level6 = list()
    level6.append(('soft voting', soft_voting))
    level6.append(('svm tuned', svm_tuned))
    level6.append(('xgbc', xgbc))
    level6.append(('rfc tuned', rfc_tuned))
    level6.append(('knn', knn))
    level6.append(('gnb', gnb))
    model = StackingClassifier(estimators=level6, final_estimator=soft_voting, cv=5)
    return model

In [285]:
level6 = list()
level6.append(('soft voting', soft_voting))
level6.append(('svm tuned', svm_tuned))
level6.append(('xgbc', xgbc))
level6.append(('rfc tuned', rfc_tuned))
level6.append(('knn', knn))
level6.append(('gnb', gnb))
level6.append(('stacking', get_stacking()))

In [None]:
stack6_sv_gnb = StackingClassifier(estimators=level6, final_estimator=soft_voting, cv=5)
stack6_sv_gnb = stack6_sv.fit(X, Y.values.ravel())
stack6_sv_gnb_y_pred = stack6_sv.predict(X_test)

In [None]:
print('Precision with 6 models learnt on soft voting classifier with GNB: ' + str(round(metrics.precision_score(Y_test, np.round(stack6_sv_gnb_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 6 models learnt on soft voting classifier with GNB: ' + str(round(metrics.f1_score(Y_test, np.round(stack6_sv_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 6 models learnt on soft voting classifier with GNB: ' + str(round(metrics.recall_score(Y_test, np.round(stack6_sv_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 6 models learnt on soft voting classifier with GNB: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack6_sv_gnb_y_pred)), 3)*100)+'%')

In [290]:
def get_stacking():
    level6 = list()
    level6.append(('soft voting', soft_voting))
    level6.append(('svm tuned', svm_tuned))
    level6.append(('xgbc', xgbc))
    level6.append(('rfc tuned', rfc_tuned))
    level6.append(('qda', qda))
    level6.append(('gnb', gnb))
    model = StackingClassifier(estimators=level6, final_estimator=svm_tuned, cv=5)
    return model

In [291]:
level6 = list()
level6.append(('soft voting', soft_voting))
level6.append(('svm tuned', svm_tuned))
level6.append(('xgbc', xgbc))
level6.append(('rfc tuned', rfc_tuned))
level6.append(('qda', qda))
level6.append(('gnb', gnb))
level6.append(('stacking', get_stacking()))

In [292]:
stack6_svm_t_gnb_qda = StackingClassifier(estimators=level6, final_estimator=svm_tuned, cv=5)
stack6_svm_t_gnb_qda = stack6_svm_t_gnb_qda.fit(X, Y.values.ravel())
stack6_svm_t_gnb_qda_y_pred = stack6_svm_t_gnb_qda.predict(X_test)



In [293]:
print('Precision with 6 models learnt on tuned SVM with GNB and QDA: ' + str(round(metrics.precision_score(Y_test, np.round(stack6_svm_t_gnb_qda_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 6 models learnt on tuned SVM with GNB and QDA: ' + str(round(metrics.f1_score(Y_test, np.round(stack6_svm_t_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 6 models learnt on tuned SVM with GNB and QDA: ' + str(round(metrics.recall_score(Y_test, np.round(stack6_svm_t_gnb_qda_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 6 models learnt on tuned SVM with GNB and QDA: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack6_svm_t_gnb_qda_y_pred)), 3)*100)+'%')

Precision with 6 models learnt on tuned SVM with GNB and QDA: 66.7%
F1 with 6 models learnt on tuned SVM with GNB and QDA: 51.0%
Recall with 6 models learnt on tuned SVM with GNB and QDA: 61.7%
Accuracy with 6 models learnt on tuned SVM with GNB and QDA: 61.7%


#### 8.2.7  <a class="anchor" id="8_2_7"></a> Top 5 models

In [201]:
def get_stacking():
	level5 = list()
	level5.append(('soft voting', soft_voting))
	level5.append(('svm tuned', svm_tuned))
	level5.append(('xgbc', xgbc))
	level5.append(('rfc tuned', rfc_tuned))
	level5.append(('knn', knn))
	model = StackingClassifier(estimators=level5, final_estimator=svm_tuned, cv=5)
	return model

In [202]:
level5 = list()
level5.append(('soft voting', soft_voting))
level5.append(('svm tuned', svm_tuned))
level5.append(('xgbc', xgbc))
level5.append(('rfc tuned', rfc_tuned))
level5.append(('knn', knn))
level5.append(('stacking', get_stacking()))

In [203]:
stack5_svm_t = StackingClassifier(estimators=level5, final_estimator=svm_tuned, cv=5)
stack5_svm_t = stack5_svm_t.fit(X, Y.values.ravel())
stack5_svm_t_y_pred = stack5_svm_t.predict(X_test)

In [204]:
print('Precision with 5 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack5_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 5 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack5_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 5 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack5_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 5 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack5_svm_t_y_pred)), 3)*100)+'%')

Precision with 5 models learnt on tuned SVM: 68.0%
F1 with 5 models learnt on tuned SVM: 50.8%
Recall with 5 models learnt on tuned SVM: 61.8%
Accuracy with 5 models learnt on tuned SVM: 61.8%


In [205]:
def get_stacking():
	level5 = list()
	level5.append(('soft voting', soft_voting))
	level5.append(('svm tuned', svm_tuned))
	level5.append(('xgbc', xgbc))
	level5.append(('rfc tuned', rfc_tuned))
	level5.append(('knn', knn))
	model = StackingClassifier(estimators=level5, final_estimator=soft_voting, cv=5)
	return model

In [206]:
stack5_sv = StackingClassifier(estimators=level5, final_estimator=soft_voting, cv=5)
stack5_sv = stack5_sv.fit(X, Y.values.ravel())
stack5_sv_y_pred = stack5_sv.predict(X_test)

In [207]:
print('Precision score with 5 models learnt on soft voting classifier: ' + str(round(metrics.precision_score(Y_test, np.round(stack5_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 5 models learnt on soft voting classifier: ' + str(round(metrics.f1_score(Y_test, np.round(stack5_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall score with 5 models learnt on soft voting classifier: ' + str(round(metrics.recall_score(Y_test, np.round(stack5_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy score with 5 models learnt on soft voting classifier: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack5_sv_y_pred)), 3)*100)+'%')

Precision score with 5 models learnt on soft voting classifier: 57.599999999999994%
F1 score with 5 models learnt on soft voting classifier: 57.3%
Recall score with 5 models learnt on soft voting classifier: 63.6%
Accuracy score with 5 models learnt on soft voting classifier: 63.6%


In [208]:
stack5_xgbc = StackingClassifier(estimators=level5, final_estimator=xgbc, cv=5)
stack5_xgbc = stack5_xgbc.fit(X, Y.values.ravel())
stack5_xgbc_y_pred = stack5_xgbc.predict(X_test)

In [209]:
print('Precision with 5 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack5_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 5 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack5_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 5 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack5_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 5 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack5_xgbc_y_pred)), 3)*100)+'%')

Precision with 5 models learnt on base XGBC: 58.699999999999996%
F1 with 5 models learnt on base XGBC: 56.49999999999999%
Recall with 5 models learnt on base XGBC: 59.5%
Accuracy with 5 models learnt on base XGBC: 59.5%


#### 8.2.8  <a class="anchor" id="8_2_8"></a> Top 4 models

In [189]:
def get_stacking():
	level4 = list()
	level4.append(('soft voting', soft_voting))
	level4.append(('svm tuned', svm_tuned))
	level4.append(('xgbc', xgbc))
	level4.append(('rfc tuned', rfc_tuned))
	model = StackingClassifier(estimators=level4, final_estimator=soft_voting, cv=5)
	return model

In [38]:
level4 = list()
level4.append(('soft voting', soft_voting))
level4.append(('svm tuned', svm_tuned))
level4.append(('xgbc', xgbc))
level4.append(('rfc tuned', rfc_tuned))
level4.append(('stacking', get_stacking()))

In [191]:
stack4_sv = StackingClassifier(estimators=level4, final_estimator=soft_voting, cv=5)
stack4_sv = stack4_sv.fit(X, Y.values.ravel())
stack4_sv_y_pred = stack4_sv.predict(X_test)

In [192]:
print('Precision with 4 models learnt on SV: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 4 models learnt on SV: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 4 models learnt on SV: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 4 models learnt on SV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_sv_y_pred)), 3)*100)+'%')

Precision with 4 models learnt on SV: 65.2%
F1 with 4 models learnt on SV: 56.8%
Recall with 4 models learnt on SV: 63.4%
Accuracy with 4 models learnt on SV: 63.4%


In [39]:
def get_stacking():
	level4 = list()
	level4.append(('soft voting', soft_voting))
	level4.append(('svm tuned', svm_tuned))
	level4.append(('xgbc', xgbc))
	level4.append(('rfc tuned', rfc_tuned))
	model = StackingClassifier(estimators=level4, final_estimator=svm_tuned, cv=5)
	return model

In [40]:
stack4_svm_t = StackingClassifier(estimators=level4, final_estimator=svm_tuned, cv=5)
stack4_svm_t = stack4_svm_t.fit(X, Y.values.ravel())
stack4_svm_t_y_pred = stack4_svm_t.predict(X_test)

In [41]:
print('Precision with 4 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 4 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 4 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 4 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_svm_t_y_pred)), 3)*100)+'%')

Precision with 4 models learnt on tuned SVM: 66.9%
F1 with 4 models learnt on tuned SVM: 50.2%
Recall with 4 models learnt on tuned SVM: 61.4%
Accuracy with 4 models learnt on tuned SVM: 61.4%


In [196]:
def get_stacking():
	level4 = list()
	level4.append(('soft voting', soft_voting))
	level4.append(('svm tuned', svm_tuned))
	level4.append(('xgbc', xgbc))
	level4.append(('rfc tuned', rfc_tuned))
	model = StackingClassifier(estimators=level4, final_estimator=xgbc, cv=5)
	return model

In [197]:
stack4_xgbc = StackingClassifier(estimators=level4, final_estimator=xgbc, cv=5)
stack4_xgbc = stack4_xgbc.fit(X, Y.values.ravel())
stack4_xgbc_y_pred = stack4_xgbc.predict(X_test)

In [198]:
print('Precision with 4 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 4 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 4 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 4 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_xgbc_y_pred)), 3)*100)+'%')

Precision with 4 models learnt on base XGBC: 59.5%
F1 with 4 models learnt on base XGBC: 57.4%
Recall with 4 models learnt on base XGBC: 60.199999999999996%
Accuracy with 4 models learnt on base XGBC: 60.199999999999996%


In [280]:
def get_stacking():
	level4 = list()
	level4.append(('soft voting', soft_voting))
	level4.append(('svm tuned', svm_tuned))
	level4.append(('xgbc', xgbc))
	level4.append(('gnb', gnb))
	model = StackingClassifier(estimators=level4, final_estimator=svm_tuned, cv=5)
	return model

In [281]:
level4 = list()
level4.append(('soft voting', soft_voting))
level4.append(('svm tuned', svm_tuned))
level4.append(('xgbc', xgbc))
level4.append(('gnb', gnb))
level4.append(('stacking', get_stacking()))

In [282]:
stack4_svm_t_gnb = StackingClassifier(estimators=level4, final_estimator=svm_tuned, cv=5)
stack4_svm_t_gnb = stack4_svm_t_gnb.fit(X, Y.values.ravel())
stack4_svm_t_gnb_y_pred = stack4_svm_t_gnb.predict(X_test)

In [283]:
print('Precision with 4 models learnt on tuned SVM with GNB: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_svm_t_gnb_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 4 models learnt on tuned SVM with GNB: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_svm_t_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 4 models learnt on tuned SVM with GNB: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_svm_t_gnb_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 4 models learnt on tuned SVM with GNB: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_svm_t_gnb_y_pred)), 3)*100)+'%')

Precision with 4 models learnt on tuned SVM with GNB: 65.8%
F1 with 4 models learnt on tuned SVM with GNB: 52.2%
Recall with 4 models learnt on tuned SVM with GNB: 62.0%
Accuracy with 4 models learnt on tuned SVM with GNB: 62.0%


#### 8.2.9  <a class="anchor" id="8_2_9"></a> Top 3 models

In [175]:
def get_stacking():
	level3 = list()
	level3.append(('soft voting', soft_voting))
	level3.append(('svm tuned', svm_tuned))
	level3.append(('xgbc', xgbc))

	model = StackingClassifier(estimators=level3, final_estimator=soft_voting, cv=5)
	return model

In [180]:
level3 = list()
level3.append(('soft voting', soft_voting))
level3.append(('svm tuned', svm_tuned))
level3.append(('xgbc', xgbc))
level3.append(('stacking', get_stacking()))

In [177]:
stack3_sv = StackingClassifier(estimators=level3, final_estimator=soft_voting, cv=5)
stack3_sv = stack3_sv.fit(X, Y.values.ravel())
stack3_sv_y_pred = stack3_sv.predict(X_test)

In [178]:
print('Precision with 3 models learnt on SV: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on SV: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on SV: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on SV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_sv_y_pred)), 3)*100)+'%')

Precision score with 3 models learnt on SV: 55.50000000000001%
F1 score with 3 models learnt on SV: 54.1%
Recall score with 3 models learnt on SV: 61.4%
Accuracy score with 3 models learnt on SV: 61.4%


In [181]:
def get_stacking():
	level3 = list()
	level3.append(('soft voting', soft_voting))
	level3.append(('svm tuned', svm_tuned))
	level3.append(('xgbc', xgbc))

	model = StackingClassifier(estimators=level3, final_estimator=svm_tuned, cv=5)
	return model

In [182]:
stack3_svm_t = StackingClassifier(estimators=level3, final_estimator=svm_tuned, cv=5)
stack3_svm_t = stack3_svm_t.fit(X, Y.values.ravel())
stack3_svm_t_y_pred = stack3_svm_t.predict(X_test)

In [183]:
print('Precision with 3 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_svm_t_y_pred)), 3)*100)+'%')

Precision with 3 models learnt on tuned SVM: 66.60000000000001%
F1 with 3 models learnt on tuned SVM: 49.4%
Recall with 3 models learnt on tuned SVM: 61.1%
Accuracy with 3 models learnt on tuned SVM: 61.1%


In [184]:
def get_stacking():
	level3 = list()
	level3.append(('soft voting', soft_voting))
	level3.append(('svm tuned', svm_tuned))
	level3.append(('xgbc', xgbc))

	model = StackingClassifier(estimators=level3, final_estimator=xgbc, cv=5)
	return model

In [185]:
stack3_xgbc = StackingClassifier(estimators=level3, final_estimator=xgbc, cv=5)
stack3_xgbc = stack3_xgbc.fit(X, Y.values.ravel())
stack3_xgbc_y_pred = stack3_xgbc.predict(X_test)

In [186]:
print('Precision with 3 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack3_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 3 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack3_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 3 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack3_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 3 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack3_xgbc_y_pred)), 3)*100)+'%')

Precision score with 3 models learnt on base XGBC: 51.7%
F1 score with 3 models learnt on base XGBC: 52.300000000000004%
Recall score with 3 models learnt on base XGBC: 56.99999999999999%
Accuracy score with 3 models learnt on base XGBC: 56.99999999999999%


#### 8.2.10  <a class="anchor" id="8_2_10"></a> Top 2 models

In [163]:
def get_stacking():
	level2 = list()
	level2.append(('soft voting', soft_voting))
	level2.append(('svm tuned', svm_tuned))

	model = StackingClassifier(estimators=level2, final_estimator=xgbc, cv=5)
	return model

In [164]:
level2 = list()
level2.append(('soft voting', soft_voting))
level2.append(('svm tuned', svm_tuned))
level2.append(('stacking', get_stacking()))

In [165]:
stack2_xgbc = StackingClassifier(estimators=level2, final_estimator=xgbc, cv=5)
stack2_xgbc = stack2_xgbc.fit(X, Y.values.ravel())
stack2_xgbc_y_pred = stack2_xgbc.predict(X_test)

In [166]:
print('Precision with 2 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_xgbc_y_pred)), 3)*100)+'%')

Precision score with 2 models learnt on base XGBC: 53.300000000000004%
F1 score with 2 models learnt on base XGBC: 53.1%
Recall score with 2 models learnt on base XGBC: 58.3%
Accuracy score with 2 models learnt on base XGBC: 58.3%


In [167]:
def get_stacking():
	level2 = list()
	level2.append(('soft voting', soft_voting))
	level2.append(('svm tuned', svm_tuned))

	model = StackingClassifier(estimators=level2, final_estimator=soft_voting, cv=5)
	return model

In [168]:
stack2_sv = StackingClassifier(estimators=level2, final_estimator=soft_voting, cv=5)
stack2_sv = stack2_sv.fit(X, Y.values.ravel())
stack2_sv_y_pred = stack2_sv.predict(X_test)

In [169]:
print('Precision with 2 models learnt on SV: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on SV: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on SV: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on SV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_sv_y_pred)), 3)*100)+'%')

Precision with 2 models learnt on SV: 61.5%
F1 with 2 models learnt on SV: 53.800000000000004%
Recall with 2 models learnt on SV: 60.9%
Accuracy with 2 models learnt on SV: 60.9%


In [170]:
def get_stacking():
	level2 = list()
	level2.append(('soft voting', soft_voting))
	level2.append(('svm tuned', svm_tuned))

	model = StackingClassifier(estimators=level2, final_estimator=svm_tuned, cv=5)
	return model

In [171]:
stack2_svm_t = StackingClassifier(estimators=level2, final_estimator=svm_tuned, cv=5)
stack2_svm_t = stack2_svm_t.fit(X, Y.values.ravel())
stack2_svm_t_y_pred = stack2_svm_t.predict(X_test)

In [172]:
print('Precision with 2 models learnt on tuned SVM: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_svm_t_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 with 2 models learnt on tuned SVM: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Recall with 2 models learnt on tuned SVM: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_svm_t_y_pred), average='weighted'), 3)*100)+'%')
print('Accuracy with 2 models learnt on tuned SVM: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_svm_t_y_pred)), 3)*100)+'%')

Precision with 2 models learnt on tuned SVM: 68.60000000000001%
F1 with 2 models learnt on tuned SVM: 46.7%
Recall with 2 models learnt on tuned SVM: 60.0%
Accuracy with 2 models learnt on tuned SVM: 60.0%
