## Build Decision Tree Classifier Model for App

### 1. Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import joblib
import os
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

### 2. Load the dataset

In [2]:
DATA_DIR = '../data'
FILE_NAME = 'credit_card_default.csv'
data_path = os.path.join(DATA_DIR, FILE_NAME)
df = pd.read_csv(data_path, index_col="ID")

### 3. Preparing the dataset

In [3]:
#rename target
df = df.rename(columns={'default payment next month': 'default'})

# Convert column names to lowercase
df.rename(columns=lambda x: x.lower(), inplace=True)

# getting the groups of features
bill_amt_features = ['bill_amt'+ str(i) for i in range(1,7)]
pay_amt_features = ['pay_amt'+ str(i) for i in range(1,7)]
numerical_features = ['limit_bal','age'] + bill_amt_features + pay_amt_features
target = ['default']

# Creating binary features
df['male'] = (df['sex'] == 1).astype('int')
df['grad_school'] = (df['education'] == 1).astype('int')
df['university'] = (df['education'] == 2).astype('int')
df['married'] = (df['marriage'] == 1).astype('int')

# simply pay features (transform the -1 and -2 values to 0)
pay_features= ['pay_' + str(i) for i in range(1,7)]
for x in pay_features:
    df.loc[df[x] <= 0, x] = 0

# creating delayed features
delayed_features = ['delayed_' + str(i) for i in range(1,7)]
for pay, delayed in zip(pay_features, delayed_features):
    df[delayed] = (df[pay] > 0).astype(int)
    
# creating a new feature: months delayed
df['months_delayed'] = df[delayed_features].sum(axis=1)

### 5. Produce the objects to train the model

In [4]:
## We will use the whole dataset for training since we have decided on the model to use

# Create X and y 
numerical_features = numerical_features + ['months_delayed']
binary_features = ['male','married','grad_school','university']
X = df[numerical_features + binary_features]
y = df['default'].astype(int)

## Standardization: centering and scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X.loc[:, numerical_features] = scaler.fit_transform(X[numerical_features])

### 5. Build Decision Tree Classifier

In [5]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier(max_depth=3, random_state = 42)
DT.fit(X, y)

DecisionTreeClassifier(max_depth=3, random_state=42)

### 6. Serialize the objects created

In [6]:
## Serializing:
# PCA
#joblib.dump(pca, '../App/pca1.joblib') 

# Scaler
joblib.dump(scaler, 'scaler.joblib')

# Trained model
joblib.dump(DT, 'credit-card-default.joblib')

['../App/credit-card-default.joblib']

In [7]:
df

Unnamed: 0_level_0,limit_bal,sex,education,marriage,age,pay_1,pay_2,pay_3,pay_4,pay_5,pay_6,bill_amt1,bill_amt2,bill_amt3,bill_amt4,bill_amt5,bill_amt6,pay_amt1,pay_amt2,pay_amt3,pay_amt4,pay_amt5,pay_amt6,default,male,grad_school,university,married,delayed_1,delayed_2,delayed_3,delayed_4,delayed_5,delayed_6,months_delayed
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
1,20000,2,2,1,24,2,2,0,0,0,0,3913,3102,689,0,0,0,0,689,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,2
2,120000,2,2,2,26,0,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0,1000,1000,1000,0,2000,1,0,0,1,0,0,1,0,0,0,1,2
3,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000,0,0,0,1,0,0,0,0,0,0,0,0
4,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000,0,0,0,1,1,0,0,0,0,0,0,0
5,50000,1,2,1,57,0,0,0,0,0,0,8617,5670,35835,20940,19146,19131,2000,36681,10000,9000,689,679,0,1,0,1,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29996,220000,1,3,1,39,0,0,0,0,0,0,188948,192815,208365,88004,31237,15980,8500,20000,5003,3047,5000,1000,0,1,0,0,1,0,0,0,0,0,0,0
29997,150000,1,3,2,43,0,0,0,0,0,0,1683,1828,3502,8979,5190,0,1837,3526,8998,129,0,0,0,1,0,0,0,0,0,0,0,0,0,0
29998,30000,1,2,2,37,4,3,2,0,0,0,3565,3356,2758,20878,20582,19357,0,0,22000,4200,2000,3100,1,1,0,1,0,1,1,1,0,0,0,3
29999,80000,1,3,1,41,1,0,0,0,0,0,-1645,78379,76304,52774,11855,48944,85900,3409,1178,1926,52964,1804,1,1,0,0,1,1,0,0,0,0,0,1


### Outlook / To do
* Create Web App on Dash