In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold

In [2]:
def show_ME(Y_test, predicted):
    print ("Accuracy Score :",accuracy_score(Y_test, predicted))
    print ("Confusion Matrix :") 
    print(confusion_matrix(Y_test, predicted))  
    print ("Report : ") 
    print (classification_report(Y_test, predicted))

def KNN(X_train, X_test, Y_train):
    classifier =  KNeighborsClassifier()
    classifier = classifier.fit(X_train, Y_train)
    predicted = classifier.predict(X_test)
    return predicted

def Naive_Bayes(X_train, X_test, Y_train):
    classifier =  GaussianNB()
    classifier = classifier.fit(X_train, Y_train)
    predicted = classifier.predict(X_test)
    return predicted

def Decision_Tree(X_train, X_test, Y_train):
    classifier =  DecisionTreeClassifier()
    classifier = classifier.fit(X_train, Y_train)
    predicted = classifier.predict(X_test)
    return predicted

def Random_Forest(X_train, X_test, Y_train):
    classifier =  RandomForestClassifier()
    classifier = classifier.fit(X_train, Y_train)
    predicted = classifier.predict(X_test)
    return predicted

def SVM(X_train, X_test, Y_train):
    classifier =  SVC()
    classifier = classifier.fit(X_train, Y_train)
    predicted = classifier.predict(X_test)
    return predicted

def SVM_cross_validation():
    acc_scores = []
    kfold = KFold(20, shuffle=True, random_state=1)
    for i in range(20):
        arr1,arr2 = next(kfold.split(data))
        X_train = data.iloc[arr1, 1:19]
        X_test = data.iloc[arr2, 1:19]
        Y_train = data.iloc[arr1, 0]
        Y_test = data.iloc[arr2, 0]
        pred = SVM(X_train, X_test, Y_train)
        acc_scores.append(accuracy_score(Y_test, pred))   
    return np.mean(acc_scores)

In [3]:
data = pd.read_csv("BankChurnersNormalized.csv")

X = data.iloc[:, 1:20].values 
Y = data.iloc[:, 0].values 

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.5)

In [4]:
# KNN
k = 15
pred1 = KNN(X_train, X_test, Y_train)
print("KNN\n")
show_ME(Y_test, pred1)

KNN

Accuracy Score : 0.8817648337317975
Confusion Matrix :
[[3688  165]
 [ 379  369]]
Report : 
              precision    recall  f1-score   support

           0       0.91      0.96      0.93      3853
           1       0.69      0.49      0.58       748

    accuracy                           0.88      4601
   macro avg       0.80      0.73      0.75      4601
weighted avg       0.87      0.88      0.87      4601



In [5]:
# Naive Bayes
pred2 = Naive_Bayes(X_train, X_test, Y_train)
print("NAIVE BAYES\n")
show_ME(Y_test, pred2)

NAIVE BAYES

Accuracy Score : 0.8841556183438383
Confusion Matrix :
[[3680  173]
 [ 360  388]]
Report : 
              precision    recall  f1-score   support

           0       0.91      0.96      0.93      3853
           1       0.69      0.52      0.59       748

    accuracy                           0.88      4601
   macro avg       0.80      0.74      0.76      4601
weighted avg       0.88      0.88      0.88      4601



In [6]:
# Decision Tree
pred3 = Decision_Tree(X_train, X_test, Y_train)
print("DECISION TREE\n")
show_ME(Y_test, pred3)

DECISION TREE

Accuracy Score : 0.9334927189741361
Confusion Matrix :
[[3702  151]
 [ 155  593]]
Report : 
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      3853
           1       0.80      0.79      0.79       748

    accuracy                           0.93      4601
   macro avg       0.88      0.88      0.88      4601
weighted avg       0.93      0.93      0.93      4601



In [7]:
# Random Forest
pred4 = Random_Forest(X_train, X_test, Y_train)
print("RANDOM FOREST\n")
show_ME(Y_test, pred4)

RANDOM FOREST

Accuracy Score : 0.9552271245381438
Confusion Matrix :
[[3806   47]
 [ 159  589]]
Report : 
              precision    recall  f1-score   support

           0       0.96      0.99      0.97      3853
           1       0.93      0.79      0.85       748

    accuracy                           0.96      4601
   macro avg       0.94      0.89      0.91      4601
weighted avg       0.95      0.96      0.95      4601



In [8]:
X = data.iloc[:, 15:16].values 
Y = data.iloc[:, 0].values 

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.5)

In [9]:
# SVM
pred5 = SVM(X_train, X_test, Y_train)
print("SVM\n")
show_ME(Y_test, pred5)
print("Cross-validation:")
print("\nAverage accuracy score :", SVM_cross_validation())

SVM

Accuracy Score : 0.8393827428819822
Confusion Matrix :
[[3835   18]
 [ 721   27]]
Report : 
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      3853
           1       0.60      0.04      0.07       748

    accuracy                           0.84      4601
   macro avg       0.72      0.52      0.49      4601
weighted avg       0.80      0.84      0.77      4601

Cross-validation:

Average accuracy score : 0.8481561822125812
