In [None]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn import metrics
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

In [None]:
#Reading in Dataset
df = pd.read_csv('BankChurners.csv')

#Subsetting to needed columns
df = df[df.columns[1:21]]

#Changing Attrition_Flag column to 0s and 1s
df['Attrition_Flag'] = df['Attrition_Flag'].map({'Existing Customer': 0, 'Attrited Customer': 1})

#Dummy variables for all Categorical Variables
df = pd.get_dummies(df, columns=["Gender", "Education_Level","Marital_Status", "Income_Category", "Card_Category"])
df.iloc[14:] = df.iloc[14:].astype(int)

In [None]:
# Creating Train/Test Split
X = df.loc[:, df.columns != 'Attrition_Flag']
y = df.loc[:, df.columns == 'Attrition_Flag']

y = y.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
# Training the Model
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train.values.ravel())

y_pred = classifier.predict(X_test)

In [None]:
error = []

# Calculating error for K values between 1 and 40
for i in range(1, 40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train.values.ravel())
    pred_i = knn.predict(X_test)
    error.append(np.mean(pred_i != y_test.values.ravel()))

In [None]:
# Plotting Errors by K values
.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

In [None]:
# Creating Confusion Matrix
cnf_matrix = confusion_matrix(y_test, y_pred)

In [None]:
#Confusion Matrix Visualization
class_names=[0,1] # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)

# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
# Printing Model Metrics
print(classification_report(y_test, y_pred))

In [None]:
# Printing Model Metrics
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))

In [None]:
#ROC Curve
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred)
auc = metrics.roc_auc_score(y_test, y_pred)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()