# Bank data analysis

The goal of your project is to create a robust classifier and use the data, where you will build a model that will recognize whether specific client will leave/unsubscribe the bank services.
Make feature engineering but also try differnet models in order to get as much accuracy as possible.




    

In [1]:
# Standard libraries for data analysis (Pandas,Numpy and sklearn package):

import pandas as pd
from numpy import nan
import numpy as np
import sklearn
import pickle

import warnings
warnings.filterwarnings('ignore')

## 1. Sample for prediction

In [2]:
dataset_pred = pd.read_csv('test.csv', sep=',', header=0)
#dataset_pred.head(1)

X_test = dataset_pred

In [3]:
X_test
y_test = X_test['Attrition_Flag']
y_test
X_test.drop(['Attrition_Flag'], axis=1, inplace=True)
X_test.drop(['CLIENTNUM'],axis=1, inplace=True)
y_test = pd.DataFrame(y_test)

## 2. Sample Preprocesing

### Preprocessing of numerical data

In [4]:
X_test

Unnamed: 0,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
0,59,M,2,Doctorate,Married,$60K - $80K,Platinum,46,1,3,6,0,0,100,20,100,5,0.5,0


In [5]:
columns = ['Marital_Status','Education_Level','Income_Category']
X_test[columns]= X_test[columns].replace('Unknown', nan)

In [6]:
X_test = X_test.assign(Total_Ct_Chng_Q4_Q1_norm = np.log(X_test['Total_Ct_Chng_Q4_Q1']+0.1))

In [7]:
X_test = X_test.assign(Total_Trans_Amt_norm = np.log(X_test['Total_Trans_Amt']+0.1))

In [8]:
X_test = X_test.assign(Total_Amt_Chng_Q4_Q1_norm = np.log(X_test['Total_Amt_Chng_Q4_Q1']+0.1))

In [9]:
X_test = X_test.assign(Avg_Open_To_Buy_norm = np.log(X_test['Avg_Open_To_Buy']+0.1))

In [10]:
X_test = X_test.assign(Credit_Limit_norm = np.log(X_test['Credit_Limit']+0.1))

In [11]:
X_test = X_test.assign(Avg_Utilization_Ratio_norm = np.log(X_test['Avg_Utilization_Ratio']+0.1))

In [12]:
X_test

Unnamed: 0,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,...,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Total_Ct_Chng_Q4_Q1_norm,Total_Trans_Amt_norm,Total_Amt_Chng_Q4_Q1_norm,Avg_Open_To_Buy_norm,Credit_Limit_norm,Avg_Utilization_Ratio_norm
0,59,M,2,Doctorate,Married,$60K - $80K,Platinum,46,1,3,...,100,5,0.5,0,-0.510826,4.60617,3.00072,4.60617,-2.302585,-2.302585


In [13]:
#RobustScaler

from sklearn.preprocessing import RobustScaler

columns = ['Total_Ct_Chng_Q4_Q1_norm','Credit_Limit_norm','Avg_Open_To_Buy_norm','Total_Amt_Chng_Q4_Q1_norm','Avg_Utilization_Ratio_norm']
transformer = RobustScaler()

X_test[columns] = transformer.fit_transform(X_test[columns])

In [14]:
#Min-Max Scaler -Age
from sklearn.preprocessing import MinMaxScaler

column = ['Customer_Age']

transformer = MinMaxScaler(feature_range=(0,1))
X_test[column] = transformer.fit_transform(X_test[column])

In [15]:
#Standard Scaler
from sklearn.preprocessing import StandardScaler

column_ = ['Total_Revolving_Bal','Total_Trans_Amt']
transformer = StandardScaler()
X_test[column_] = transformer.fit_transform(X_test[column_])

In [16]:
# from sklearn.preprocessing import Normalizer

# columns = ['Total_Ct_Chng_Q4_Q1','Credit_Limit','Avg_Open_To_Buy','Total_Amt_Chng_Q4_Q1','Avg_Utilization_Ratio']
# normalizer = Normalizer(norm='l2') 

# X_test[columns] = normalizer.fit_transform(X_test[columns])


In [17]:
X_test

Unnamed: 0,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,...,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,Total_Ct_Chng_Q4_Q1_norm,Total_Trans_Amt_norm,Total_Amt_Chng_Q4_Q1_norm,Avg_Open_To_Buy_norm,Credit_Limit_norm,Avg_Utilization_Ratio_norm
0,0.0,M,2,Doctorate,Married,$60K - $80K,Platinum,46,1,3,...,0.0,5,0.5,0,0.0,4.60617,0.0,0.0,0.0,0.0


In [18]:
y_test

Unnamed: 0,Attrition_Flag
0,Attrited Customer


### Preprocessing of categorical data

In [19]:
# Mapping Attrited_Flag
mapper = {'Attrited Customer': 1, 'Existing Customer': 0}
y_test = y_test.assign(Attrition_Flag = y_test['Attrition_Flag'].replace(mapper))


In [20]:
y_test

Unnamed: 0,Attrition_Flag
0,1


In [21]:
import category_encoders as ce

cat_features = ['Gender','Education_Level','Marital_Status','Income_Category','Card_Category']

count_encoder = ce.CatBoostEncoder(cols=cat_features)
X_test[cat_features] = count_encoder.fit_transform(X_test[cat_features],y_test)

In [22]:
X_test.shape

(1, 25)

In [23]:
X_test=X_test.drop(['Total_Ct_Chng_Q4_Q1','Credit_Limit','Avg_Open_To_Buy',
          'Total_Amt_Chng_Q4_Q1','Avg_Utilization_Ratio','Total_Trans_Amt'], axis=1)

### Prediction 

In [24]:
Loaded_model = pickle.load(open('Notebook_training_final','rb'))

prediction = Loaded_model.predict(X_test)
#prediction = transformer.inverse_transform(prediction)

print("The customer leave/unsubscribe the bank services:",prediction)

The customer leave/unsubscribe the bank services: [1]
