# Final classification with XGBoost classifier
### This file contains the training and testing of XGBoost classifier with and without fusion

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

In [2]:
df = pd.read_excel("cnn.xlsx")

In [3]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50, 'target'],
      dtype='object')

In [4]:
X = df.drop(['target'], axis=1)

y = df['target']

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [6]:
X_train.shape, X_test.shape

((438, 50), (189, 50))

In [7]:
cols = X_train.columns

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [9]:
X_train = pd.DataFrame(X_train, columns=[cols])

In [10]:
X_test = pd.DataFrame(X_test, columns=[cols])

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

def evaluate(model, X_train, X_test, y_train, y_test):
    
    y_test_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)

    print("Training:\n")
    clf_report = pd.DataFrame(classification_report(y_train, y_train_pred, output_dict=True))
    print(f"Accuracy:\n{accuracy_score(y_train, y_train_pred):.4f}")

    print("Testing: \n")
    clf_report = pd.DataFrame(classification_report(y_test, y_test_pred, output_dict=True))
    print(f"Accuracy:\n{accuracy_score(y_test, y_test_pred):.4f}")

In [14]:
from sklearn.metrics import accuracy_score
import xgboost as xgb

xgb_cl = xgb.XGBClassifier()

xgb_cl.fit(X_train, y_train)

preds = xgb_cl.predict(X_test)

  from pandas import MultiIndex, Int64Index






In [15]:
y_pred = xgb_cl.predict(X_test)

In [16]:
print('Training set score: {:.4f}'.format(xgb_cl.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(xgb_cl.score(X_test, y_test)))

Training set score: 1.0000
Test set score: 0.9153


In [17]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[85  8]
 [ 8 88]]

True Positives(TP) =  85

True Negatives(TN) =  88

False Positives(FP) =  8

False Negatives(FN) =  8


In [18]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91        93
           1       0.92      0.92      0.92        96

    accuracy                           0.92       189
   macro avg       0.92      0.92      0.92       189
weighted avg       0.92      0.92      0.92       189



## With Fusion

In [19]:
df = pd.read_excel("cnn_abcd.xlsx",sheet_name=2)

In [20]:
col_names = df.columns

col_names

Index([       1,        2,        3,        4,        5,        6,        7,
              8,        9,       10,       11,       12,       13,       14,
             15,       16,       17,       18,       19,       20,       21,
             22,       23,       24,       25,       26,       27,       28,
             29,       30,       31,       32,       33,       34,       35,
             36,       37,       38,       39,       40,       41,       42,
             43,       44,       45,       46,       47,       48,       49,
             50,       51,       52,       53,       54, 'target'],
      dtype='object')

In [21]:
X = df.drop(['target'], axis=1)

y = df['target']

In [22]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [23]:
X_train.shape, X_test.shape

((432, 54), (186, 54))

In [24]:
cols = X_train.columns

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [26]:
from sklearn.metrics import accuracy_score

xgb_cl = xgb.XGBClassifier()

xgb_cl.fit(X_train, y_train)

preds = xgb_cl.predict(X_test)





In [27]:
y_pred = xgb_cl.predict(X_test)

In [28]:
print('Training set score: {:.4f}'.format(xgb_cl.score(X_train, y_train)))

print('Test set score: {:.4f}'.format(xgb_cl.score(X_test, y_test)))

Training set score: 1.0000
Test set score: 0.9301


In [29]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[0,0])

print('\nTrue Negatives(TN) = ', cm[1,1])

print('\nFalse Positives(FP) = ', cm[0,1])

print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[90  7]
 [ 6 83]]

True Positives(TP) =  90

True Negatives(TN) =  83

False Positives(FP) =  7

False Negatives(FN) =  6


In [30]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.93        97
           1       0.92      0.93      0.93        89

    accuracy                           0.93       186
   macro avg       0.93      0.93      0.93       186
weighted avg       0.93      0.93      0.93       186

