# Credit Risk MLP Neural Network #

Today's topic is about assessing credit risk for a borrower using an MLP neural network.

In [67]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Load the credit dataset
df = pd.read_csv('credit_training.csv').drop(columns=['Unnamed: 0'])
print(df.shape)
df.head()

(150000, 11)


Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,1,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,0,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,0,0.65818,38,1,0.085113,3042.0,2,1,0,0,0.0
3,0,0.23381,30,0,0.03605,3300.0,5,0,0,0,0.0
4,0,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0


In [68]:
X = df.drop(columns=['SeriousDlqin2yrs'])
X = X.rename(columns={'RevolvingUtilizationOfUnsecuredLines':'CreditUtilization',
                      'NumberOfTime30-59DaysPastDueNotWorse':'PastDue30-59',
                      'NumberOfTime60-89DaysPastDueNotWorse':'PastDue60-89',
                      'NumberOfTimes90DaysLate':'PastDue90+',
                      'NumberOfOpenCreditLinesAndLoans':'CreditLines',
                      'NumberRealEstateLoansOrLines':'RealEstateLoans',
                      'NumberOfDependents':'Dependents'})
y = df['SeriousDlqin2yrs'].rename('Target')
print(f"Features: {X.columns.to_list()[:5]}...")
print(f"Output: '{y.name}'")
X.head()

Features: ['CreditUtilization', 'age', 'PastDue30-59', 'DebtRatio', 'MonthlyIncome']...
Output: 'Target'


Unnamed: 0,CreditUtilization,age,PastDue30-59,DebtRatio,MonthlyIncome,CreditLines,PastDue90+,RealEstateLoans,PastDue60-89,Dependents
0,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,0.65818,38,1,0.085113,3042.0,2,1,0,0,0.0
3,0.23381,30,0,0.03605,3300.0,5,0,0,0,0.0
4,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0


In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, random_state=432, stratify=y)
print(f"Training count: {X_train.shape}")
print(f"Test count:     {X_test.shape}")

Training count: (90000, 10)
Test count:     (60000, 10)


In [70]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 90000 entries, 120266 to 46321
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CreditUtilization  90000 non-null  float64
 1   age                90000 non-null  int64  
 2   PastDue30-59       90000 non-null  int64  
 3   DebtRatio          90000 non-null  float64
 4   MonthlyIncome      72196 non-null  float64
 5   CreditLines        90000 non-null  int64  
 6   PastDue90+         90000 non-null  int64  
 7   RealEstateLoans    90000 non-null  int64  
 8   PastDue60-89       90000 non-null  int64  
 9   Dependents         87650 non-null  float64
dtypes: float64(4), int64(6)
memory usage: 7.6 MB


In [71]:
X_train['Dependents'].describe()

count    87650.000000
mean         0.757924
std          1.115645
min          0.000000
25%          0.000000
50%          0.000000
75%          1.000000
max         20.000000
Name: Dependents, dtype: float64

In [72]:
X_train['Dependents'] = X_train['Dependents'].fillna(X_train['Dependents'].mean())
X_train['Dependents'].isna().sum()

0

In [73]:
X_train['MonthlyIncome'].describe()

count    7.219600e+04
mean     6.716191e+03
std      1.752010e+04
min      0.000000e+00
25%      3.400000e+03
50%      5.416000e+03
75%      8.289500e+03
max      3.008750e+06
Name: MonthlyIncome, dtype: float64

In [74]:
X_train['MonthlyIncome'] = X_train['MonthlyIncome'].fillna(0)
X_train['MonthlyIncome'].isna().sum()

0

In [75]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 90000 entries, 120266 to 46321
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CreditUtilization  90000 non-null  float64
 1   age                90000 non-null  int64  
 2   PastDue30-59       90000 non-null  int64  
 3   DebtRatio          90000 non-null  float64
 4   MonthlyIncome      90000 non-null  float64
 5   CreditLines        90000 non-null  int64  
 6   PastDue90+         90000 non-null  int64  
 7   RealEstateLoans    90000 non-null  int64  
 8   PastDue60-89       90000 non-null  int64  
 9   Dependents         90000 non-null  float64
dtypes: float64(4), int64(6)
memory usage: 7.6 MB


In [76]:
scaler = StandardScaler().fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
X_train.head()

Unnamed: 0,CreditUtilization,age,PastDue30-59,DebtRatio,MonthlyIncome,CreditLines,PastDue90+,RealEstateLoans,PastDue60-89,Dependents
0,-0.022508,0.995771,-0.099736,-0.169812,0.259542,0.108002,-0.065119,-0.014867,-0.059349,-0.68841
1,-0.019681,-0.768419,-0.099736,0.536798,-0.338456,0.108002,-0.065119,0.877776,-0.059349,-0.68841
2,-0.021129,1.267185,-0.099736,-0.169757,0.567179,1.66345,-0.065119,1.77042,-0.059349,1.128158
3,-0.021587,-0.497005,-0.099736,1.110313,-0.338456,-1.253015,-0.065119,-0.014867,-0.059349,-0.68841
4,-0.021036,-0.022031,-0.099736,-0.169658,0.030495,0.496864,-0.065119,1.77042,-0.059349,0.219874


In [59]:
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    solver='adam',
    max_iter=200,
    random_state=123,
    learning_rate='adaptive',
    learning_rate_init=0.01
)
mlp.fit(X_train, y_train)

In [77]:
X_test['Dependents'] = X_test['Dependents'].fillna(X_test['Dependents'].mean())
X_test['MonthlyIncome'] = X_test['MonthlyIncome'].fillna(0)
X_test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 60000 entries, 42702 to 147608
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CreditUtilization  60000 non-null  float64
 1   age                60000 non-null  int64  
 2   PastDue30-59       60000 non-null  int64  
 3   DebtRatio          60000 non-null  float64
 4   MonthlyIncome      60000 non-null  float64
 5   CreditLines        60000 non-null  int64  
 6   PastDue90+         60000 non-null  int64  
 7   RealEstateLoans    60000 non-null  int64  
 8   PastDue60-89       60000 non-null  int64  
 9   Dependents         60000 non-null  float64
dtypes: float64(4), int64(6)
memory usage: 5.0 MB


In [78]:
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

In [79]:
X_test.head()

Unnamed: 0,CreditUtilization,age,PastDue30-59,DebtRatio,MonthlyIncome,CreditLines,PastDue90+,RealEstateLoans,PastDue60-89,Dependents
0,-0.022156,0.792211,-0.099736,-0.053486,-0.338456,0.691295,-0.065119,-0.907511,-0.059349,-0.68841
1,-0.021791,0.249383,-0.099736,-0.169696,0.358863,0.108002,-0.065119,0.877776,-0.059349,1.128158
2,-0.022308,0.113676,-0.099736,1.047068,-0.338456,0.108002,-0.065119,-0.014867,-0.059349,2.036442
3,-0.019464,1.470746,-0.099736,-0.169694,0.090866,-0.669722,-0.065119,-0.014867,-0.059349,-0.68841
4,-0.022553,0.045823,-0.099736,-0.169795,0.153625,0.496864,-0.065119,-0.014867,-0.059349,0.219874


In [80]:
y_pred = mlp.predict(X_test)

In [81]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.99      0.97     55990
           1       0.56      0.13      0.21      4010

    accuracy                           0.93     60000
   macro avg       0.75      0.56      0.59     60000
weighted avg       0.92      0.93      0.92     60000

