# Predict credit default

- Dataset: https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
- Inspiration for model: https://www.kaggle.com/code/mahyar511/payment-default-prediction-neural-network

# 1. Setup

In [None]:
# ! conda install keras -y
# ! conda install tensorflow -y

In [3]:
# imports
import pandas as pd
import numpy as np

from sklearn.utils import class_weight
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical


In [2]:
# get the dataset from UCI ML Repository
# ! curl -o default.xls https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls

In [4]:
# load the dataset
df = pd.read_excel('default.xls', header=1)
df.shape

ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 1.0.0 for Excel support Use pip or conda to install xlrd.

In [None]:
# examine
df.head(2)

In [5]:
# distribution of defaullt
df['default payment next month'].value_counts()

0    23364
1     6636
Name: default payment next month, dtype: int64

In [6]:
# Calculating default Ratio
non_default = len(df[df['default payment next month']==0])
default = len(df[df['default payment next month']==1])
ratio = float(default/(non_default+default))
print('Default Ratio :',ratio)

Default Ratio : 0.2212


## Feature engineering

In [7]:
# split into input (X) and output (y) variables
predictors = df.drop(['ID', 'default payment next month'], axis=1)

In [8]:
# Standardize the predictors
X = StandardScaler().fit_transform(predictors)

In [9]:
y = to_categorical(df['default payment next month'])

In [10]:
print(X.shape, y.shape)

(30000, 23) (30000, 2)


## Train test split

In [11]:
# split into 67% for train and 33% for test

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [12]:
# how many features are there? (this will be the input dimensions)
X_train.shape

(20100, 23)

In [13]:
# how many in each category now?
print('y_train')
unique, counts = np.unique(y_train, return_counts=True)
for x in [0,1]:
    print(unique[x],counts[x])
print('y_test')
unique, counts = np.unique(y_test, return_counts=True)
for x in [0,1]:
    print(unique[x],counts[x])


y_train
0.0 20100
1.0 20100
y_test
0.0 9900
1.0 9900


# 2. Define Keras Model

In [14]:
# parameters
n_cols = predictors.shape[1]
early_stopping_monitor = EarlyStopping(patience=2)
class_weight = {0:ratio, 1:1-ratio}
print(n_cols)

23


In [15]:
# keras has two APIs: Sequential and Functional.
model = Sequential()

2022-07-17 10:22:17.103804: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
# input layer and first hidden layer
# relu = rectified linear unit
model.add(Dense(12, activation='relu', 
                input_shape = (n_cols,))
         )

In [17]:
# second hidden layer
model.add(Dense(12, activation='relu'))

In [18]:
# output layer
model.add(Dense(2, activation='softmax'))

# 3. Compile Keras Model

In [42]:
# compile the keras model
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy']
             )

# 4. Fit the model

In [43]:
# fit the keras model on the dataset

model.fit(X_train, 
          y_train, 
          validation_data=(X_test,y_test), 
          epochs=150, 
          batch_size=10,
          class_weight=class_weight,
          callbacks = [early_stopping_monitor]
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20


<tensorflow.python.keras.callbacks.History at 0x7f933695d100>

# 5. Evaluate

In [44]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 75.34


In [45]:
# make probability predictions with the model (they come in pairs)
y_probs = model.predict(X_test)
y_probs[:10]

array([[0.5351723 , 0.46482772],
       [0.7012957 , 0.2987043 ],
       [0.6522899 , 0.34770998],
       [0.6814053 , 0.31859472],
       [0.56280655, 0.43719342],
       [0.5904106 , 0.40958944],
       [0.2779231 , 0.72207683],
       [0.4735084 , 0.5264916 ],
       [0.8631505 , 0.13684952],
       [0.7066022 , 0.2933977 ]], dtype=float32)

In [47]:
# express these as booleans
y_probs > 0.7

array([[ True, False],
       [ True, False],
       [ True, False],
       ...,
       [ True, False],
       [ True, False],
       [ True, False]])

In [48]:
# make class predictions with the model
y_preds = (y_probs > 0.5).astype(int)
y_preds[:10]

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0]])

In [26]:
# check out the first ten
print(y_preds[:10].tolist())
print(y_test[:10].tolist())

[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0]]
[[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]


In [27]:

# Evaluate the model
print(metrics.classification_report(y_test, y_preds))

              precision    recall  f1-score   support

           0       0.88      0.80      0.84      7742
           1       0.46      0.59      0.52      2158

   micro avg       0.76      0.76      0.76      9900
   macro avg       0.67      0.70      0.68      9900
weighted avg       0.78      0.76      0.77      9900
 samples avg       0.76      0.76      0.76      9900



# 6. Make Predictions


# 7. Save the model
https://machinelearningmastery.com/save-load-keras-deep-learning-models/

In [28]:
model.save("credit-model-1.h5")

In [29]:
# load model
from keras.models import load_model
model2 = load_model('credit-model-1.h5')

In [30]:
# make class predictions with the model
predictions = (model.predict(X) > 0.5).astype(int)
predictions[:10]

array([[0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0]])