# Project - Predicting Potential Credit Card Payment Defaulters                                                     

### The purpose of this program is to prepare data readiness to make new predictions by retrieving model from the disk

### The data columns that require to make new predictions are

In [51]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, roc_auc_score

In [52]:
# Load the New Dataset
sample = pd.read_csv("New_Dataset.csv")

In [53]:
sample.shape

(249, 22)

In [54]:
sample.dtypes

Limit             int64
Education         int64
Age               int64
SEP_PYMT_STS      int64
AUG_PYMT_STS      int64
JUL_PYMT_STS      int64
JUN_PYMT_STS      int64
MAY_PYMT_STS      int64
APR_PYMT_STS      int64
SEP_STMT          int64
AUG_STMT          int64
JUL_STMT          int64
JUN_STMT          int64
MAY_STMT          int64
APR_STMT          int64
SEP_PAID          int64
AUG_PAID          int64
JUL_PAID          int64
JUN_PAID        float64
MAY_PAID          int64
APR_PAID          int64
Default           int64
dtype: object

# Data readiness to make new predictions

### Create new column to capture missing payment count (if any) in last 6 months for each customer

In [55]:
# payment_miss_in_6_months = 0, customer not defaulted payment
# no_payment_last_4_months > 0, meaning customer defaulted n times in last six months 

df = pd.DataFrame(sample[['SEP_PAID','AUG_PAID','JUL_PAID','JUN_PAID','MAY_PAID','APR_PAID']])
Count = pd.DataFrame()
Count = df.apply( lambda s : s.value_counts().get(0,0), axis=1)
sample['payment_miss_in_6_months']   = Count
del(df)
del(Count)

### Create new column to capture last four months continious defaulters data

In [56]:
# no_payment_last_4_months = 0, customer not defaulted continiously for four months
# no_payment_last_4_months = 1, meaning customer defaulted continiously for four months

df = pd.DataFrame(sample[['SEP_PAID','AUG_PAID','JUL_PAID','JUN_PAID']])
Count = pd.DataFrame()
Count = df.apply( lambda s : s.value_counts().get(0,0), axis=1)
sample['no_payment_last_4_months']   = Count
del(df)
del(Count)

In [57]:
sample.shape

(249, 24)

In [58]:
sample.loc[sample['no_payment_last_4_months'] < 4, 'no_payment_last_4_months'] = 0
sample.loc[sample['no_payment_last_4_months'] == 4, 'no_payment_last_4_months'] = 1

In [59]:
sample.shape

(249, 24)

In [60]:
# change the sequence of the columns
sample = sample[['Limit','Education','Age','SEP_PYMT_STS','AUG_PYMT_STS',
                                       'JUL_PYMT_STS','JUN_PYMT_STS','MAY_PYMT_STS','APR_PYMT_STS', 'SEP_STMT','AUG_STMT',
                                       'JUL_STMT','JUN_STMT','MAY_STMT','APR_STMT','SEP_PAID','AUG_PAID','JUL_PAID',
                                       'JUN_PAID','MAY_PAID','APR_PAID','payment_miss_in_6_months','no_payment_last_4_months',
                                       'Default']]

In [61]:
sample.dtypes

Limit                         int64
Education                     int64
Age                           int64
SEP_PYMT_STS                  int64
AUG_PYMT_STS                  int64
JUL_PYMT_STS                  int64
JUN_PYMT_STS                  int64
MAY_PYMT_STS                  int64
APR_PYMT_STS                  int64
SEP_STMT                      int64
AUG_STMT                      int64
JUL_STMT                      int64
JUN_STMT                      int64
MAY_STMT                      int64
APR_STMT                      int64
SEP_PAID                      int64
AUG_PAID                      int64
JUL_PAID                      int64
JUN_PAID                    float64
MAY_PAID                      int64
APR_PAID                      int64
payment_miss_in_6_months      int64
no_payment_last_4_months      int64
Default                       int64
dtype: object

### break the sample data to predictor data and label 

In [62]:
sample_data,sample_label =sample.iloc[:,:-1],sample.iloc[:,-1]

# End of Data readiness to make new predictions

# Load the Model from the drive

In [63]:
from sklearn.externals import joblib
filename = 'final_model_payment_defaulter.sav'

In [64]:
loaded_model = joblib.load(filename)

In [65]:
Accuracy = loaded_model.score(sample_data,sample_label)
Accuracy = Accuracy*100
Pred_proba = loaded_model.predict_proba(sample_data)[:, 1]
roc_score  = roc_auc_score(sample_label, Pred_proba)

In [66]:
print("Accuracy : %.2f" % Accuracy)
print("AUC      : %.2f" % roc_score)

Accuracy : 81.12
AUC      : 0.80


# End of Program