# **Abstract**
* The objective of this notebook is to predict customers churns (cancellation) among credit card customers

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('../input/credit-card-customers/BankChurners.csv')

# Remove Last 2 Columns
data = data.iloc[:, :-2]
data = data.iloc[:, 1:]

In [None]:
data.columns

In [None]:
sns.countplot(x='Attrition_Flag', data=data)

In [None]:
# Checking if we have some NaN values in our dataset
data.isna().sum()

# Exploratory Data Analysis

* Doing some visualization with our data;

* Correlation between features.

In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(x='Income_Category', 
            y='Credit_Limit',
            order=['Unknown', 'Less than $40K', '$40K - $60K', '$60K - $80K', '$80K - $120K', '$120K +'],
            data=data).set_title('Income x Credit_Limit Boxplot')

In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(x='Education_Level', 
            y='Credit_Limit',
            order=['Unknown', 'Uneducated', 'High School', 'College', 'Graduate', 'Doctorate', 'Post-Graduate'],
            data=data).set_title('Boxplot')

In [None]:
colormap = plt.cm.RdBu
plt.figure(figsize=(14,12))
plt.title('Pearson Correlation of Features', y=1.05, size=15)
sns.heatmap(data.corr(),linewidths=0.1,vmax=1.0, 
            square=True, cmap=colormap, linecolor='white', annot=True)

In [None]:
# Mapping target feature 'Attrition Flag' to binary
data['Attrition_Flag'] = data['Attrition_Flag'].map({'Existing Customer': 0, 'Attrited Customer': 1})

**Pearson Correlation between features and target variable Attrtion_Flag**

In [None]:
corr = data.corr()
corr.sort_values('Attrition_Flag', ascending=False, inplace=True)
print(corr.Attrition_Flag)

In [None]:
# One Hot Encoding Categorical Features
data = pd.get_dummies(data)

In [None]:
# Churns vs Credit Card Category (Blue, Silver, Gold, Platinum)
print('Card        % of Customers     % of Churns')
print('------------------------------------------')
print('Blue: %15.2f %17.2f' % (data['Card_Category_Blue'].mean()*100, 
                               (data['Card_Category_Blue'] == data['Attrition_Flag']).mean()*100))

print('Silver: %13.2f %17.2f' % (data['Card_Category_Silver'].mean()*100, 
                                 (data['Card_Category_Silver'] == data['Attrition_Flag']).mean()*100))

print('Gold: %15.2f %17.2f' % (data['Card_Category_Gold'].mean()*100, 
                               (data['Card_Category_Gold'] == data['Attrition_Flag']).mean()*100))

print('Platinum: %11.2f %17.2f' % (data['Card_Category_Platinum'].mean()*100, 
                                   (data['Card_Category_Platinum'] == data['Attrition_Flag']).mean()*100))

In [None]:
# Separating features into x and target y Attrition_Flag (churns)
x = data.iloc[:, 1:].values
y = data.iloc[:, 0].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=5)

**Visualizing our with some Dimensionality Reduction Algorithms**

Using PCA decomposition and t-SNE to see if we can find some clusters

In [None]:
# Scaling features
scaler = StandardScaler()
scaler.fit(x_train)

scaled_x_train = scaler.transform(x_train)
scaled_x_test = scaler.transform(x_test)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
pc = pca.fit_transform(scaled_x_train)

print('Components Variance:')
print('1st: %.3f' % pca.explained_variance_ratio_[0])
print('2nd: %.3f' % pca.explained_variance_ratio_[1])
print('3rd: %.3f' % pca.explained_variance_ratio_[2])

In [None]:
plt.scatter(pc[:, 0], pc[:, 1], c=y_train)
plt.title("PCA")
plt.xlabel("1st Component")
plt.ylabel("2nd Component")
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(8,8))
ax = Axes3D(fig)
ax.scatter(pc[:, 0], pc[:, 1], pc[:, 2], c=y_train)
ax.set_xlabel('1st Component')
ax.set_ylabel('2nd Component')
ax.set_zlabel('3rd Component')
ax.set_title('PCA')

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=3)
tsne_comp = tsne.fit_transform(scaled_x_train)

In [None]:
plt.scatter(tsne_comp[:, 0], tsne_comp[:, 1], c=y_train)
plt.title("t-SNE")
plt.xlabel("1st Component")
plt.ylabel("2nd Component")
plt.show()

In [None]:
fig = plt.figure(figsize=(8,8))
ax = Axes3D(fig)
ax.scatter(tsne_comp[:, 0], tsne_comp[:, 1], tsne_comp[:, 2], c=y_train)
ax.set_xlabel('1st Component')
ax.set_ylabel('2nd Component')
ax.set_zlabel('3rd Component')
ax.set_title('t-SNE')

# Testing Classifiers

We'll test some classifiers using cross validation to determine which one we will fine tune and test it with our test set.

* Logistic Regression
* Decision Tree
* LinearSVC
* Random Forest
* XGBoost
* Gaussian Naive Bayes

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import KFold, cross_val_score

# Evaluate Models

n_folds = 10
models = []

svc_clf = LinearSVC(random_state=0, tol=1e-5, dual=False)
svc_clf.fit(scaled_x_train, y_train)

models.append(('LR', LogisticRegression()))
models.append(('Tree', DecisionTreeClassifier()))
models.append(('SVC', LinearSVC(dual=False)))
models.append(('Forest', RandomForestClassifier()))
models.append(('XGB', XGBClassifier(use_label_encoder=False, eval_metric='logloss')))
models.append(('NB', GaussianNB()))

for name, model in models:
    kfold = KFold(n_splits=n_folds)
    cv_results = cross_val_score(model, scaled_x_train, y_train, cv=kfold, scoring='accuracy')
    print("%6s %.3f %.3f " % (name, cv_results.mean(), cv_results.std()))

**Grid Search Hyperparameter Tuning for XGBoost**

Now we will use sklearn's grid search to find the best parameters for our XGBoost model

In [None]:
import xgboost as xgb

params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [5, 6, 7, 8],
        }

xgb_clf = xgb.XGBClassifier(learning_rate=0.3, n_estimators=600, objective='binary:logistic',
                            nthread=1, use_label_encoder=False, eval_metric='logloss')

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold

folds = 3
param_comb = 5

skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state = 25)

search = RandomizedSearchCV(xgb_clf, param_distributions=params, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(x_train, y_train), random_state=25)

search.fit(x_train, y_train)

print('Best hyperparameters:')
print(search.best_params_)

In [None]:
params = {
        'min_child_weight': 1,
        'gamma': 2,
        'subsample': 1,
        'colsample_bytree': 0.8,
        'max_depth': 8,
        }

xgb_clf = xgb.XGBClassifier(learning_rate=0.3, n_estimators=600, objective='binary:logistic',
                            nthread=1, use_label_encoder=False, eval_metric='logloss')

xgb_clf.fit(x_train, y_train)

**Model accuracy in our test set**

In [None]:
xgb_predictions = xgb_clf.predict(x_test)
xgb_predictions = np.round(xgb_predictions)
print('XGBoost Test Set')
print('Accuracy: %.2f' % ((xgb_predictions == y_test).mean()*100))

In [None]:
xgb_clf.get_booster().feature_names = list(data.columns[1:])
xgb_clf.get_booster().get_score(importance_type="gain")

fig, ax = plt.subplots(1,1,figsize=(10,10))
xgb.plot_importance(xgb_clf, max_num_features=35, ax=ax)
plt.show()