# Final classification with Bagging classifier
### This file contains the training and testing of Bagging classifier with and without fusion

## Without fusion

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

In [33]:
df = pd.read_excel("cnn.xlsx")

In [34]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50, 'target'],
      dtype='object')

In [35]:
X = df.drop(['target'], axis=1)

y = df['target']

In [36]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [37]:
X_train.shape, X_test.shape

((501, 50), (126, 50))

In [38]:
cols = X_train.columns

In [39]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [40]:
X_train = pd.DataFrame(X_train, columns=[cols])

In [41]:
X_test = pd.DataFrame(X_test, columns=[cols])

In [42]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

def evaluate(model, X_train, X_test, y_train, y_test):
    
    y_test_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)

    print("Training: \n")
    clf_report = pd.DataFrame(classification_report(y_train, y_train_pred, output_dict=True))
    print(f"Accuracy:\n{accuracy_score(y_train, y_train_pred):.4f}")

    print("Testing: \n")
    clf_report = pd.DataFrame(classification_report(y_test, y_test_pred, output_dict=True))
    print(f"Accuracy:\n{accuracy_score(y_test, y_test_pred):.4f}")

In [43]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
bagging_clf = BaggingClassifier(base_estimator=tree, n_estimators=1500, random_state=42)
bagging_clf.fit(X_train, y_train)

evaluate(bagging_clf, X_train, X_test, y_train, y_test)

Training: 

Accuracy:
1.0000
Testing: 

Accuracy:
0.9206


In [44]:
y_pred = bagging_clf.predict(X_test)

In [45]:
print('Training set score: {:.4f}'.format(bagging_clf.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(bagging_clf.score(X_test, y_test)))

Training set score: 1.0000
Test set score: 0.9206


In [46]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[63  7]
 [ 3 53]]

True Positives(TP) =  63

True Negatives(TN) =  53

False Positives(FP) =  7

False Negatives(FN) =  3


In [47]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.90      0.93        70
           1       0.88      0.95      0.91        56

    accuracy                           0.92       126
   macro avg       0.92      0.92      0.92       126
weighted avg       0.92      0.92      0.92       126



## With Fusion

In [17]:
df = pd.read_excel("cnn_abcd.xlsx",sheet_name=2)

In [18]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50,       51,       52,       53,       54, 'target'],
      dtype='object')

In [19]:
X = df.drop(['target'], axis=1)

y = df['target']

In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [21]:
X_train.shape, X_test.shape

((432, 54), (186, 54))

In [22]:
cols = X_train.columns

In [23]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [24]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier()
bagging_clf = BaggingClassifier(base_estimator=tree, n_estimators=1500, random_state=42)
bagging_clf.fit(X_train, y_train)

evaluate(bagging_clf, X_train, X_test, y_train, y_test)

Training: 

Accuracy:
1.0000
Testing: 

Accuracy:
0.9247


In [25]:
y_pred = bagging_clf.predict(X_test)

In [28]:
#y_pred = grad_boost_clf.predict(X_test)

In [29]:
print('Training set score: {:.4f}'.format(bagging_clf.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(bagging_clf.score(X_test, y_test)))

Training set score: 1.0000
Test set score: 0.9247


In [30]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[90  7]
 [ 7 82]]

True Positives(TP) =  90

True Negatives(TN) =  82

False Positives(FP) =  7

False Negatives(FN) =  7


In [31]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        97
           1       0.92      0.92      0.92        89

    accuracy                           0.92       186
   macro avg       0.92      0.92      0.92       186
weighted avg       0.92      0.92      0.92       186

