# Credit Card Churning Analysis and Prediction

In [1]:
# Import necessary modules and libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import svm

np.random.seed(1980)
%matplotlib inline

In [2]:
# Load in the data and drop unnecesary columns
data = pd.read_csv("BankChurners.csv")
data.drop(columns=["CLIENTNUM", "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1", "Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2"], inplace=True)
data.head(7)

Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
0,Existing Customer,45,M,3,High School,Married,$60K - $80K,Blue,39,5,1,3,12691.0,777,11914.0,1.335,1144,42,1.625,0.061
1,Existing Customer,49,F,5,Graduate,Single,Less than $40K,Blue,44,6,1,2,8256.0,864,7392.0,1.541,1291,33,3.714,0.105
2,Existing Customer,51,M,3,Graduate,Married,$80K - $120K,Blue,36,4,1,0,3418.0,0,3418.0,2.594,1887,20,2.333,0.0
3,Existing Customer,40,F,4,High School,Unknown,Less than $40K,Blue,34,3,4,1,3313.0,2517,796.0,1.405,1171,20,2.333,0.76
4,Existing Customer,40,M,3,Uneducated,Married,$60K - $80K,Blue,21,5,1,0,4716.0,0,4716.0,2.175,816,28,2.5,0.0
5,Existing Customer,44,M,2,Graduate,Married,$40K - $60K,Blue,36,3,1,2,4010.0,1247,2763.0,1.376,1088,24,0.846,0.311
6,Existing Customer,51,M,4,Unknown,Married,$120K +,Gold,46,6,1,3,34516.0,2264,32252.0,1.975,1330,31,0.722,0.066


Here are the unique values for each column

In [3]:
print("The income category unique values are: " + str(data.loc[:, "Income_Category"].unique()))
print("The gender unique values are: " + str(data.loc[:, "Gender"].unique()))
print("The education level unique values are: " + str(data.loc[:, "Education_Level"].unique()))
print("The marital status unique values are: " + str(data.loc[:, "Marital_Status"].unique()))
print("The card category unique values are: " + str(data.loc[:, "Card_Category"].unique()))
print("The attrition flag unique values are: " + str(data.loc[:, "Attrition_Flag"].unique()))

The income category unique values are: ['$60K - $80K' 'Less than $40K' '$80K - $120K' '$40K - $60K' '$120K +'
 'Unknown']
The gender unique values are: ['M' 'F']
The education level unique values are: ['High School' 'Graduate' 'Uneducated' 'Unknown' 'College' 'Post-Graduate'
 'Doctorate']
The marital status unique values are: ['Married' 'Single' 'Unknown' 'Divorced']
The card category unique values are: ['Blue' 'Gold' 'Silver' 'Platinum']
The attrition flag unique values are: ['Existing Customer' 'Attrited Customer']


In [4]:
income_category_dict = {
    "Unknown": 0,
    "Less than $40K": 1,
    "$40K - $60K": 2,
    '$60K - $80K': 3,
    '$80K - $120K': 4,
    '$120K +': 5
}

In [5]:
gender_dict = {"M": 0, "F": 1}

In [6]:
education_level_dict = {
    "Unknown": 0,
    'Uneducated': 1,
    'High School': 2,
    'College': 3,
    'Graduate': 4, 
    'Post-Graduate': 5,
    'Doctorate': 6
}

In [7]:
marital_status_dict = {
    "Unknown": 0,
    "Single": 1,
    "Married": 2,
    "Divorced": 3
}

In [8]:
card_category_dict = {
    "Blue": 0,
    "Silver": 1,
    "Gold": 2,
    "Platinum": 3
}

In [9]:
attrition_flag_dict = {"Existing Customer": 0, "Attrited Customer": 1}

Replace all the categories with numerical values

In [10]:
data['Income_Category'] = data['Income_Category'].map(income_category_dict)
data["Gender"] = data["Gender"].map(gender_dict)
data["Education_Level"] = data["Education_Level"].map(education_level_dict)
data["Marital_Status"] = data["Marital_Status"].map(marital_status_dict)
data["Card_Category"] = data["Card_Category"].map(card_category_dict)
data["Attrition_Flag"] = data["Attrition_Flag"].map(attrition_flag_dict)

In [11]:
# Shuffle the data to have a more accurate model
data = sklearn.utils.shuffle(data)
data

Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
5235,0,60,1,0,4,2,1,0,51,6,1,2,2230.0,1796,434.0,0.637,4929,61,0.488,0.805
7177,0,45,1,4,2,1,2,0,29,3,2,1,4595.0,1781,2814.0,0.708,3746,68,0.545,0.388
3224,0,36,0,2,3,1,3,0,22,5,3,3,4511.0,754,3757.0,0.905,2597,71,0.919,0.167
7435,0,35,1,4,2,1,1,0,25,5,2,3,2684.0,1658,1026.0,0.669,4657,76,1.000,0.618
6096,0,56,0,4,4,1,4,0,36,5,3,2,2078.0,1696,382.0,0.562,4382,82,0.783,0.816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9526,1,49,0,4,4,1,4,0,36,2,3,2,3999.0,0,3999.0,0.938,7567,76,0.652,0.000
3115,0,39,0,2,4,3,3,0,34,5,1,4,2267.0,0,2267.0,0.875,4544,84,0.750,0.000
9976,0,57,0,2,4,2,5,1,39,4,2,3,34516.0,1444,33072.0,0.568,16392,116,0.657,0.042
4992,1,37,1,3,0,2,1,0,17,3,1,3,1468.0,1074,394.0,0.697,2050,32,0.524,0.732


In [12]:
# Set the variables and the labels
X = data.drop(columns="Attrition_Flag").values
X = sklearn.preprocessing.scale(X)
y = data.loc[:, "Attrition_Flag"].values

In [13]:
# Use an Linear SVC model to classify the churning customers
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = svm.SVC(kernel="linear")

In [14]:
# Train the model
model.fit(X_train, y_train)

SVC(kernel='linear')

In [15]:
# Print out the accuracy
accuracy = (model.predict(X_test) == y_test).mean()
accuracy

0.9087677725118484