<a href="https://colab.research.google.com/github/kartikgandhi/Soft-Voting-Ensemble-Results-Edited-Dataset/blob/main/Ensemble_model_Phoenix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
from numpy import mean
from numpy import std

In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [45]:
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

In [46]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [47]:
DATA = pd.read_csv("/content/gdrive/MyDrive/Edited Datasets/Edited-phoenix data 4.2-4.3 - phoenix data 4.2-4.3.csv")


In [48]:
DATA['Change'] = DATA['Change'].map({'yes': 1, 'no': 0})

In [49]:
DATA.shape

(1100, 8)

In [50]:
DATA.head()

Unnamed: 0,CBO,NOC,RFC,LOC,DIT,LCOM,WMC,Change
0,5,0,16,44,0,0,3,1
1,9,0,23,309,1,70,10,1
2,31,1,17,154,1,58,4,1
3,2,0,16,15,1,33,3,0
4,27,0,36,544,0,78,23,1


In [51]:
X = DATA.drop('Change', axis=1)
y = DATA['Change']

In [52]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [53]:
# get a voting ensemble of models
def get_voting():
	# define the base models
	models = list()
	models.append(('lin', SVC(probability=True, kernel='linear')))
	models.append(('rbf', SVC(probability=True, kernel='rbf')))
	models.append(('poly', SVC(probability=True, kernel='poly')))

	# define the voting ensemble
	ensemble = VotingClassifier(estimators=models, voting='soft')
	return ensemble

In [54]:
# get a list of models to evaluate
def get_models():
	models = dict()
	models['hard_voting'] = get_voting()
	return models

In [55]:
# evaluate a give model
def evaluate_model(model, X, y):
  model=model.fit(X_train,y_train)
  y_pred = model.predict(X_test)
  return y_pred

In [56]:
# get the models to evaluate
models = get_models()

In [57]:
results, names = list(), list()
for name, model in models.items():
    y_pred = evaluate_model(model, X, y)
    results.append(y_pred)
    names.append(name)
    print('>%s %.3f (%.3f)' % (name, mean(y_pred), std(y_pred)))

>hard_voting 0.164 (0.370)


In [58]:
print(y_pred)

[1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 0 0 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 0 1 0 1 1 0 0 0
 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1]


In [59]:
from sklearn.metrics import classification_report, confusion_matrix
cm=confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

[[117   6]
 [ 67  30]]
              precision    recall  f1-score   support

           0       0.64      0.95      0.76       123
           1       0.83      0.31      0.45        97

    accuracy                           0.67       220
   macro avg       0.73      0.63      0.61       220
weighted avg       0.72      0.67      0.63       220



In [60]:
# predict probabilities
from sklearn.metrics import roc_auc_score
pred_prob = model.predict_proba(X_test)
auc_score = roc_auc_score(y_test, pred_prob[:,1])
print("Area Under Curve=")
print(auc_score)

Area Under Curve=
0.7705137876121029


In [61]:
from sklearn.metrics import matthews_corrcoef
mcc=matthews_corrcoef(y_test,y_pred)
print("Matthews correlation coefficient=")
print(mcc)

Matthews correlation coefficient=
0.3496088560807286


In [62]:
from sklearn.metrics import balanced_accuracy_score
bac=balanced_accuracy_score(y_test, y_pred)
print("Balanced Accuracy Score=")
print(bac)

Balanced Accuracy Score=
0.6302489313552929


In [63]:
from imblearn.metrics import geometric_mean_score
gmean=geometric_mean_score(y_test, y_pred)
print("Geometric Mean Score=")
print(gmean)

Geometric Mean Score=
0.5423943230803873
