<h1 align="center">MSIN0114: Business Analytics Consulting Project</h1>
<h2 align="center">Recoverability of Client X projects: run 3</h2>

## Notebook Setup

In [2]:
# Essentials
import pandas as pd
from pandas import Series, DataFrame
from pandas.api.types import CategoricalDtype
pd.options.display.max_columns = None
import sqlite3
import pyodbc
import numpy as np; np.random.seed(1)

# Image creation and display
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.patches as mpatches
from matplotlib import pyplot
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.ticker import FuncFormatter
from yellowbrick.model_selection import FeatureImportances

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

# Models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.naive_bayes import GaussianNB

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis 

from sklearn.decomposition import PCA

# Metrics of accuracy
from numpy import mean
from numpy import std
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from pycm import *
import imbalanced_ensemble as imbens
from imbalanced_ensemble.ensemble.base import sort_dict_by_key
from collections import Counter

# Fine-tuning and enseble learning
from pprint import pprint
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.base import clone
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import RandomizedSearchCV

# Other
import itertools as it
import io
import os
os.sys.path
import sys
import glob
import concurrent.futures
from __future__ import print_function
import binascii
import struct
from PIL import Image
import scipy
import scipy.misc
import scipy.cluster
import datetime, time
import functools, operator
from datetime import datetime
from numpy.random import seed
from numpy.random import randn
from numpy import percentile

In [3]:
df = pd.read_csv('csv-files/resampled_compact_data.csv')

## Data splitting and training

In [4]:
Y = df[['Rec_Class']]
X = df.drop(columns = ['Rec_Class', 'Profit_Class'])
X1 = pd.DataFrame(preprocessing.normalize(X))
X1.columns = X.columns
X = X1
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=1, stratify = Y)

In [33]:
# Logistic regression
log = LogisticRegression(random_state = 1, max_iter = 30000)
log.fit(X_train, Y_train.values.ravel())
log_y_pred=log.predict(X_test)

# Ridge regression
rdg = RidgeClassifier(alpha=1.0, random_state = 1, max_iter = 30000)
rdg.fit(X_train, Y_train.values.ravel())
rdg_y_pred=rdg.predict(X_test)

# k-Neighbours
np.random.seed(1)
knn_100 = KNeighborsClassifier(n_neighbors=100)
knn_100.fit(X_train, Y_train.values.ravel())
knn_100_y_pred = knn_100.predict(X_test)

# Decision tree classifier
dtc = DecisionTreeClassifier(random_state = 1)
dtc = dtc.fit(X_train, Y_train.values.ravel())
dtc_y_pred = dtc.predict(X_test)

# Random forest classifier
rfc = RandomForestClassifier(random_state=1)
rfc.fit(X_train, Y_train.values.ravel())
# Logistic regression
log = LogisticRegression(random_state = 1, max_iter = 30000)
log.fit(X_train, Y_train.values.ravel())
log_y_pred=log.predict(X_test)

# Ridge regression
rdg = RidgeClassifier(alpha=1.0, random_state = 1, max_iter = 30000)
rdg.fit(X_train, Y_train.values.ravel())
rdg_y_pred=rdg.predict(X_test)

# k-Neighbours
np.random.seed(1)
knn_100 = KNeighborsClassifier(n_neighbors=100)
knn_100.fit(X_train, Y_train.values.ravel())
knn_100_y_pred = knn_100.predict(X_test)

# Decision tree classifier
dtc = DecisionTreeClassifier(random_state = 1)
dtc = dtc.fit(X_train, Y_train.values.ravel())
dtc_y_pred = dtc.predict(X_test)

# Random forest classifier
rfc = RandomForestClassifier(random_state=1)
rfc.fit(X_train, Y_train.values.ravel())

# XGBoost classifier
xgbc = XGBClassifier(n_estimators=100, learning_rate=0.05, booster='gbtree', random_state = 1, eval_metric='mlogloss', objective='binary:logistic', use_label_encoder=False)
xgbc.fit(X_train, Y_train.values.ravel())
xgbc_y_pred=xgbc.predict(X_test)

# Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, Y_train.values.ravel())
gnb_y_pred = gnb.predict(X_test)

# Linear discriminant analysis
lda = LinearDiscriminantAnalysis(n_components = 1)
lda.fit(X_train, Y_train.values.ravel())
lda_y_pred = lda.predict(X_test)

# Quadratic discriminant analysis
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, Y_train.values.ravel())
qda_y_pred = qda.predict(X_test)

# Support vector machine
svm_tuned = SVC(kernel='rbf', C = 2, gamma = 3, random_state = 1, probability=True)
svm_tuned.fit(X_train, Y_train.values.ravel())
svm_y_pred = svm_tuned.predict(X_test)

# Soft voting classifier
soft_voting = VotingClassifier(estimators=[('xgbc', xgbc), ('rfc', rfc), ('svm_t', svm_tuned)],voting='soft')
soft_voting.fit(X_train, Y_train.values.ravel())
sv_y_pred = soft_voting.predict(X_test)

# Hard voting classifier
hard_voting = VotingClassifier(estimators=[('xgbc', xgbc), ('rfc', rfc), ('svm_t', svm_tuned)], voting='hard')
hard_voting.fit(X_train, Y_train.values.ravel())
hv_y_pred = hard_voting.predict(X_test)

## Stacking

#### 8.2.1  <a class="anchor" id="8_2_1"></a> Top 9 models

See results for **HV** in pr_run_1, **base XGBC** in pr_run_2.

In [27]:
def get_stacking():
	# Define the base models
	level9 = list()
	level9.append(('dtc', dtc))
	level9.append(('knn', knn_100))
	level9.append(('rdg', rdg))
	level9.append(('lda', lda))
	level9.append(('log', log))
	level9.append(('svm_t', svm_tuned))
	level9.append(('rfc', rfc))
	level9.append(('hv', hard_voting))
	level9.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on base random forest classifier
	model = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level9 = list()
level9.append(('dtc', dtc))
level9.append(('knn', knn_100))
level9.append(('rdg', rdg))
level9.append(('lda', lda))
level9.append(('log', log))
level9.append(('svm_t', svm_tuned))
level9.append(('rfc', rfc))
level9.append(('hv', hard_voting))
level9.append(('xgbc', xgbc))
level9.append(('stacking', get_stacking()))

In [28]:
# Define the model
stack9_rfc = StackingClassifier(estimators=level9, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack9_rfc = stack9_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack9_rfc_y_pred = stack9_rfc.predict(X_test)

In [29]:
# Accuracy measures
print('Accuracy score with 9 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack9_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 9 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 9 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 9 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack9_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 9 models learnt on base RFC: 79.2%
Recall score  with 9 models learnt on base RFC: 79.2%
Precision score  with 9 models learnt on base RFC: 79.2%
F1 score with 9 models learnt on base RFC: 79.2%


#### 8.2.2  <a class="anchor" id="8_2_2"></a> Top 8 models

See results for **HV** in pr_run_1, **base XGBC** in pr_run_2.

In [22]:
def get_stacking():
	# Define the base models
	level8 = list()
	level8.append(('knn', knn_100))
	level8.append(('rdg', rdg))
	level8.append(('lda', lda))
	level8.append(('log', log))
	level8.append(('svm_t', svm_tuned))
	level8.append(('rfc', rfc))
	level8.append(('hv', hard_voting))
	level8.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on base random forest classifier
	model = StackingClassifier(estimators=level8, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level8 = list()
level8.append(('knn', knn_100))
level8.append(('rdg', rdg))
level8.append(('lda', lda))
level8.append(('log', log))
level8.append(('svm_t', svm_tuned))
level8.append(('rfc', rfc))
level8.append(('hv', hard_voting))
level8.append(('xgbc', xgbc))
level8.append(('stacking', get_stacking()))

In [23]:
# Define the model
stack8_rfc = StackingClassifier(estimators=level8, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack8_rfc = stack8_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack8_rfc_y_pred = stack8_rfc.predict(X_test)

In [24]:
# Accuracy measures
print('Accuracy score with 8 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack8_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 8 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 8 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 8 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack8_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 8 models learnt on base RFC: 76.5%
Recall score  with 8 models learnt on base RFC: 76.5%
Precision score  with 8 models learnt on base RFC: 76.5%
F1 score with 8 models learnt on base RFC: 76.5%


#### 8.2.3  <a class="anchor" id="8_2_3"></a> Top 7 models

See results for **HV** in pr_run_1, **base XGBC** in pr_run_2.

In [19]:
def get_stacking():
	# Define the base models
	level7 = list()
	level7.append(('rdg', rdg))
	level7.append(('lda', lda))
	level7.append(('log', log))
	level7.append(('svm_t', svm_tuned))
	level7.append(('rfc', rfc))
	level7.append(('hv', hard_voting))
	level7.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on base random forest classifier
	model = StackingClassifier(estimators=level7, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level7 = list()
level7.append(('rdg', rdg))
level7.append(('lda', lda))
level7.append(('log', log))
level7.append(('svm_t', svm_tuned))
level7.append(('rfc', rfc))
level7.append(('hv', hard_voting))
level7.append(('xgbc', xgbc))
level7.append(('stacking', get_stacking()))

In [20]:
# Define the model
stack7_rfc = StackingClassifier(estimators=level7, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack7_rfc = stack7_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack7_rfc_y_pred = stack7_rfc.predict(X_test)

In [21]:
# Accuracy measures
print('Accuracy score with 7 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack7_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 7 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack7_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 7 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack7_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 7 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack7_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 7 models learnt on base RFC: 77.2%
Recall score  with 7 models learnt on base RFC: 77.2%
Precision score  with 7 models learnt on base RFC: 77.2%
F1 score with 7 models learnt on base RFC: 77.2%


#### 8.2.4  <a class="anchor" id="8_2_4"></a> Top 6 models

See results for **base XGBC and base RFC** in pr_run_2.

#### 8.2.5  <a class="anchor" id="8_2_5"></a> Top 5 models

See results for **base XGBC and HV** in pr_run_1.

In [16]:
def get_stacking():
	# Define the base models
	level5 = list()
	level5.append(('log', log))
	level5.append(('svm_t', svm_tuned))
	level5.append(('rfc', rfc))
	level5.append(('hv', hard_voting))
	level5.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on base random forest classifier classifier
	model = StackingClassifier(estimators=level5, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level5 = list()
level5.append(('log', log))
level5.append(('svm_t', svm_tuned))
level5.append(('rfc', rfc))
level5.append(('hv', hard_voting))
level5.append(('xgbc', xgbc))
level5.append(('stacking', get_stacking()))

In [17]:
# Define the model
stack5_rfc = StackingClassifier(estimators=level5, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack5_rfc = stack5_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack5_rfc_y_pred = stack5_rfc.predict(X_test)

In [18]:
# Accuracy measures
print('Accuracy score with 5 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack5_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 5 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack5_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 5 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack5_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 5 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack5_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 5 models learnt on base RFC: 76.2%
Recall score  with 5 models learnt on base RFC: 76.2%
Precision score  with 5 models learnt on base RFC: 76.2%
F1 score with 5 models learnt on base RFC: 76.2%


#### 8.2.6  <a class="anchor" id="8_2_6"></a> Top 4 models

See results for **SV** in pr_run_1, **HV** in pr_run_2.

In [10]:
def get_stacking():
	# Define the base models
	level4 = list()
	level4.append(('svm_t', svm_tuned))
	level4.append(('rfc', rfc))
	level4.append(('hv', hard_voting))
	level4.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on hard voting classifier
	model = StackingClassifier(estimators=level4, final_estimator=xgbc, cv=5)
	return model

# Define the base models separately
level4 = list()
level4.append(('svm_t', svm_tuned))
level4.append(('rfc', rfc))
level4.append(('xgbc', xgbc))
level4.append(('hv', hard_voting))
level4.append(('stacking', get_stacking()))

In [11]:
# Define the model
stack4_xgbc = StackingClassifier(estimators=level4, final_estimator=xgbc, cv=5)

# Fit the model on all available data
stack4_xgbc = stack4_xgbc.fit(X, Y.values.ravel())

# Predict the response for test set
stack4_xgbc_y_pred = stack4_xgbc.predict(X_test)

In [12]:
# Accuracy measures
print('Accuracy score with 4 models learnt on base XGBC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_xgbc_y_pred)), 3)*100)+'%')
print('Recall score  with 4 models learnt on base XGBC: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 4 models learnt on base XGBC: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 4 models learnt on base XGBC: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_xgbc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 4 models learnt on base XGBC: 77.2%
Recall score  with 4 models learnt on base XGBC: 77.2%
Precision score  with 4 models learnt on base XGBC: 77.5%
F1 score with 4 models learnt on base XGBC: 77.10000000000001%


In [13]:
def get_stacking():
	# Define the base models
	level4 = list()
	level4.append(('svm_t', svm_tuned))
	level4.append(('rfc', rfc))
	level4.append(('hv', hard_voting))
	level4.append(('xgbc', xgbc))

	# Define the stacking ensemble learnt on random forest classifer
	model = StackingClassifier(estimators=level4, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level4 = list()
level4.append(('svm_t', svm_tuned))
level4.append(('rfc', rfc))
level4.append(('xgbc', xgbc))
level4.append(('hv', hard_voting))
level4.append(('stacking', get_stacking()))

In [14]:
# Define the model
stack4_rfc = StackingClassifier(estimators=level4, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack4_rfc = stack4_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack4_rfc_y_pred = stack4_rfc.predict(X_test)

In [15]:
# Accuracy measures
print('Accuracy score with 4 models learnt on base RFC: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack4_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 4 models learnt on base RFC: ' + str(round(metrics.recall_score(Y_test, np.round(stack4_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 4 models learnt on base RFC: ' + str(round(metrics.precision_score(Y_test, np.round(stack4_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 4 models learnt on base RFC: ' + str(round(metrics.f1_score(Y_test, np.round(stack4_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 4 models learnt on base RFC: 76.2%
Recall score  with 4 models learnt on base RFC: 76.2%
Precision score  with 4 models learnt on base RFC: 76.3%
F1 score with 4 models learnt on base RFC: 76.2%


#### 8.2.7  <a class="anchor" id="8_2_7"></a> Top 3 models

See results for See results for **HV, SV, base RFC on SV** in pr_run_1, **XGBC, base RFC on HV** in pr_run_2.

#### 8.2.8  <a class="anchor" id="8_2_8"></a> Top 2 models

See results for **HV, SV** in pr_run_1, **base XGBC** in pr_run_2.

In [30]:
def get_stacking():
	# Define the base models
	level2 = list()
	level2.append(('xgbc', xgbc))
	level2.append(('hv', hard_voting))

	# Define the stacking ensemble
	model = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level2 = list()
level2.append(('xgbc', xgbc))
level2.append(('hv', hard_voting))
level2.append(('stacking', get_stacking()))

In [31]:
# Define the model
stack2_rfc = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack2_rfc = stack2_rfc.fit(X, Y.values.ravel())

# Predict the response for test set
stack2_rfc_y_pred = stack2_rfc.predict(X_test)

In [32]:
# Accuracy measures
print('Accuracy score with 2 models learnt on base RFC with HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_rfc_y_pred)), 3)*100)+'%')
print('Recall score  with 2 models learnt on base RFC with HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 2 models learnt on base RFC with HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 2 models learnt on base RFC with HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_rfc_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 2 models learnt on base RFC with HV: 69.69999999999999%
Recall score  with 2 models learnt on base RFC with HV: 69.69999999999999%
Precision score  with 2 models learnt on base RFC with HV: 69.69999999999999%
F1 score with 2 models learnt on base RFC with HV: 69.69999999999999%


In [35]:
# Get a stacking ensemble of models
def get_stacking():
	# Define the base models
	level2 = list()
	level2.append(('xgbc', xgbc))
	level2.append(('sv', soft_voting))

	# Define the stacking ensemble
	model = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)
	return model

# Define the base models separately
level2 = list()
level2.append(('xgbc', xgbc))
level2.append(('sv', soft_voting))
level2.append(('stacking', get_stacking()))

In [36]:
# Define the model
stack2_rfc_sv = StackingClassifier(estimators=level2, final_estimator=rfc, cv=5)

# Fit the model on all available data
stack2_rfc_sv = stack2_rfc_sv.fit(X, Y.values.ravel())

# Predict the response for test set
stack2_rfc_sv_y_pred = stack2_rfc_sv.predict(X_test)

In [37]:
# Accuracy measures
print('Accuracy score with 2 models learnt on base RFC with SV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_rfc_sv_y_pred)), 3)*100)+'%')
print('Recall score  with 2 models learnt on base RFC with SV: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_rfc_sv_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 2 models learnt on base RFC with SV: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_rfc_sv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 2 models learnt on base RFC with SV: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_rfc_sv_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 2 models learnt on base RFC with SV: 82.19999999999999%
Recall score  with 2 models learnt on base RFC with SV: 82.19999999999999%
Precision score  with 2 models learnt on base RFC with SV: 82.19999999999999%
F1 score with 2 models learnt on base RFC with SV: 82.1%


In [39]:
# Get a stacking ensemble of models
def get_stacking():
	# Define the base models
	level2 = list()
	level2.append(('svm_t', svm_tuned))
	level2.append(('hv', hard_voting))

	# Define the stacking ensemble
	model = StackingClassifier(estimators=level2, final_estimator=svm_tuned, cv=5)
	return model

# Define the base models separately
level2 = list()
level2.append(('svm_t', svm_tuned))
level2.append(('hv', hard_voting))
level2.append(('stacking', get_stacking()))

In [40]:
# Define the model
stack2_svm_t_hv = StackingClassifier(estimators=level2, final_estimator=svm_tuned, cv=5)

# Fit the model on all available data
stack2_svm_t_hv = stack2_svm_t_hv.fit(X, Y.values.ravel())

# Predict the response for test set
stack2_svm_t_hv_y_pred = stack2_svm_t_hv.predict(X_test)

In [41]:
# Accuracy measures
print('Accuracy score with 2 models learnt on tuned SVM with HV: ' + str(round(metrics.accuracy_score(Y_test, np.round(stack2_svm_t_hv_y_pred)), 3)*100)+'%')
print('Recall score  with 2 models learnt on tuned SVM with HV: ' + str(round(metrics.recall_score(Y_test, np.round(stack2_svm_t_hv_y_pred), average='weighted'), 3)*100)+'%')
print('Precision score  with 2 models learnt on tuned SVM with HV: ' + str(round(metrics.precision_score(Y_test, np.round(stack2_svm_t_hv_y_pred), average='weighted', zero_division=1), 3)*100)+'%')
print('F1 score with 2 models learnt on tuned SVM with HV: ' + str(round(metrics.f1_score(Y_test, np.round(stack2_svm_t_hv_y_pred), average='weighted'), 3)*100)+'%')

Accuracy score with 2 models learnt on tuned SVM with HV: 80.9%
Recall score  with 2 models learnt on tuned SVM with HV: 80.9%
Precision score  with 2 models learnt on tuned SVM with HV: 81.3%
F1 score with 2 models learnt on tuned SVM with HV: 80.9%
