In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

In [23]:
# Load the data
data = pd.read_csv('loan_approval_dataset.csv')

In [7]:
# Preprocessing
# Drop the 'loan_id' column as it is not needed for training
data.drop('loan_id', axis=1, inplace=True)

In [9]:
# Encode categorical variables
label_encoders = {}
for column in [' education', ' self_employed']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

In [12]:
# Encode the target variable
data[' loan_status'] = LabelEncoder().fit_transform(data[' loan_status'])

In [14]:
# Separate features and target variable
X = data.drop(' loan_status', axis=1)
y = data[' loan_status']

In [15]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
# Define the MLP model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [27]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9559 - loss: 0.1224 - val_accuracy: 0.9678 - val_loss: 0.0910
Epoch 2/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9641 - loss: 0.1061 - val_accuracy: 0.9663 - val_loss: 0.1028
Epoch 3/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9570 - loss: 0.1057 - val_accuracy: 0.9707 - val_loss: 0.0971
Epoch 4/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9681 - loss: 0.0966 - val_accuracy: 0.9707 - val_loss: 0.0904
Epoch 5/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9663 - loss: 0.1132 - val_accuracy: 0.9736 - val_loss: 0.0803
Epoch 6/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9698 - loss: 0.0941 - val_accuracy: 0.9678 - val_loss: 0.0881
Epoch 7/100
[1m86/86[0m [32m━━━

In [28]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9738 - loss: 0.0667
Test Accuracy: 97.31%


In [29]:
# Predicting the results
y_pred = (model.predict(X_test) > 0.5).astype("int32")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [30]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Classification Report
print(classification_report(y_test, y_pred))

[[532   4]
 [ 19 299]]
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       536
           1       0.99      0.94      0.96       318

    accuracy                           0.97       854
   macro avg       0.98      0.97      0.97       854
weighted avg       0.97      0.97      0.97       854



In [34]:
# Save the model
model.save('mlp.h5')
print("Model saved as mlp_loan_approval_model.h5")



Model saved as mlp_loan_approval_model.h5


In [4]:
# Importing the module for LimeTabularExplainer
from lime import lime_tabular
 
# Instantiating the explainer object by passing in the training set,
# and the extracted features
explainer_lime = lime_tabular.LimeTabularExplainer(X_train,
                                                   feature_names=features,
                                                   verbose=True, 
                                                   mode='regression')

ModuleNotFoundError: No module named 'lime'

In [None]:
# Index corresponding to the test vector
i = 10

# Number denoting the top features
k = 5

# Calling the explain_instance method by passing in the:
# 1) ith test vector
# 2) prediction function used by our prediction model('reg' in this case)
# 3) the top features which we want to see, denoted by k

exp_lime = explainer_lime.explain_instance(
	X_test[i], reg.predict, num_features=k)

# Finally visualizing the explanations
exp_lime.show_in_notebook()
