**Credit card risk prediction is done using ANN**

**Import required libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,precision_score,recall_score
import tensorflow as tf
import keras

In [None]:
df= pd.read_csv("credit.csv")
df.drop("Unnamed: 0",axis=1,inplace=True)

In [None]:
df.head()

Unnamed: 0,status,duration,credit_history,purpose,amount,savings,employment_duration,installment_rate,personal_status_sex,other_debtors,...,property,age,other_installment_plans,housing,number_credits,job,people_liable,telephone,foreign_worker,credit_risk
0,1.0,18.0,4.0,2.0,1049.0,1.0,2.0,4.0,2.0,1.0,...,2.0,21.0,3.0,1.0,1.0,3.0,2.0,1.0,2.0,1.0
1,1.0,9.0,4.0,0.0,2799.0,1.0,3.0,2.0,3.0,1.0,...,1.0,36.0,3.0,1.0,2.0,3.0,1.0,1.0,2.0,1.0
2,2.0,12.0,2.0,9.0,841.0,2.0,4.0,2.0,2.0,1.0,...,1.0,23.0,3.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0
3,1.0,12.0,4.0,0.0,2122.0,1.0,3.0,3.0,3.0,1.0,...,1.0,39.0,3.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0
4,1.0,12.0,4.0,0.0,2171.0,1.0,3.0,4.0,3.0,1.0,...,2.0,38.0,1.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0


In [None]:
df["credit_risk"].value_counts()  # imbalance data set, this will effect the accuracy and hence need to be handled.

1.0    700
0.0    300
Name: credit_risk, dtype: int64

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 21 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   status                   1000 non-null   float64
 1   duration                 1000 non-null   float64
 2   credit_history           1000 non-null   float64
 3   purpose                  1000 non-null   float64
 4   amount                   1000 non-null   float64
 5   savings                  1000 non-null   float64
 6   employment_duration      1000 non-null   float64
 7   installment_rate         1000 non-null   float64
 8   personal_status_sex      1000 non-null   float64
 9   other_debtors            1000 non-null   float64
 10  present_residence        1000 non-null   float64
 11  property                 1000 non-null   float64
 12  age                      1000 non-null   float64
 13  other_installment_plans  1000 non-null   float64
 14  housing                  

In [None]:
df.isnull().sum()   # no null values

status                     0
duration                   0
credit_history             0
purpose                    0
amount                     0
savings                    0
employment_duration        0
installment_rate           0
personal_status_sex        0
other_debtors              0
present_residence          0
property                   0
age                        0
other_installment_plans    0
housing                    0
number_credits             0
job                        0
people_liable              0
telephone                  0
foreign_worker             0
credit_risk                0
dtype: int64

**Feature Selection & Train Test split**

In [None]:
X= df.iloc[:,0:20]
Y = df["credit_risk"]

In [None]:
sc= StandardScaler()
X = sc.fit_transform(X)

In [None]:
X_train,X_test,Y_train, Y_test = train_test_split(X,Y,train_size=0.75,random_state=42)

In [None]:
# handling imbalance dataset using SMOTE
from imblearn.over_sampling import SMOTE 
smot = SMOTE(sampling_strategy='minority',random_state=42)
x_train,y_train = smot.fit_resample(X_train,Y_train)

In [None]:
from collections import Counter
Counter(y_train)

Counter({1.0: 525, 0.0: 525})

**Model building**

In [None]:
model = Sequential()
model.add(Dense(units = 15,input_dim = 20,activation ="relu"))
model.add(Dense(units=5, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(units=4, activation='relu'))
model.add(Dense(1, activation ='sigmoid'))


In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = 0.015,
    beta_1=0.6, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam',), loss='binary_crossentropy',metrics="accuracy")

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=15,
                                            restore_best_weights=True)

In [None]:
hist = model.fit(x_train,y_train,batch_size=10,validation_data=(X_test,Y_test), epochs=150,verbose=1,callbacks = callback)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150


In [None]:
loss, acc = model.evaluate(X_test,Y_test)



In [None]:
print("Accuracy is : ", np.round((acc*100),2))

Accuracy is :  77.6


**Model using MLPClassifier**

In [None]:
model = MLPClassifier(alpha = 0.3, batch_size=10,hidden_layer_sizes=(8,),learning_rate ='adaptive',max_iter = 50,activation ='relu')

In [None]:
model.fit(x_train,y_train)



MLPClassifier(alpha=0.3, batch_size=10, hidden_layer_sizes=(8,),
              learning_rate='adaptive', max_iter=50)

In [None]:
pred = model.predict(X_test)

In [None]:
print("Accuracy using MLPClassifier is :  ",accuracy_score(Y_test,pred)*100)

Accuracy using MLPClassifier is :   76.4


In [None]:
confusion_matrix(Y_test,pred)

array([[ 56,  19],
       [ 40, 135]])

In [None]:
classification_report(Y_test,pred)

'              precision    recall  f1-score   support\n\n         0.0       0.58      0.75      0.65        75\n         1.0       0.88      0.77      0.82       175\n\n    accuracy                           0.76       250\n   macro avg       0.73      0.76      0.74       250\nweighted avg       0.79      0.76      0.77       250\n'

In [None]:
precision_score(Y_test,pred)

0.8766233766233766

In [None]:
recall_score(Y_test,pred)

0.7714285714285715