In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

In [14]:
# Load the data
data = pd.read_csv('loan_approval_dataset.csv')

In [15]:
# Preprocessing
# Drop the 'loan_id' column as it is not needed for training
data.drop('loan_id', axis=1, inplace=True)

In [16]:
# Encode categorical variables
label_encoders = {}
for column in [' education', ' self_employed']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

In [17]:
# Encode the target variable
data[' loan_status'] = LabelEncoder().fit_transform(data[' loan_status'])

In [18]:
# Separate features and target variable
X = data.drop(' loan_status', axis=1)
y = data[' loan_status']

In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [20]:
# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [21]:
# Define the MLP model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [23]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.6112 - loss: 0.6604 - val_accuracy: 0.8404 - val_loss: 0.5112
Epoch 2/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7210 - loss: 0.5131 - val_accuracy: 0.9180 - val_loss: 0.3113
Epoch 3/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8425 - loss: 0.3707 - val_accuracy: 0.9429 - val_loss: 0.2176
Epoch 4/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8798 - loss: 0.3271 - val_accuracy: 0.9385 - val_loss: 0.1877
Epoch 5/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8849 - loss: 0.2818 - val_accuracy: 0.9473 - val_loss: 0.1717
Epoch 6/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9087 - loss: 0.2567 - val_accuracy: 0.9517 - val_loss: 0.1632
Epoch 7/100
[1m86/86[0m [32m━━━

[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9561 - loss: 0.1332 - val_accuracy: 0.9619 - val_loss: 0.0984
Epoch 52/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9589 - loss: 0.1167 - val_accuracy: 0.9649 - val_loss: 0.0906
Epoch 53/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9579 - loss: 0.1266 - val_accuracy: 0.9634 - val_loss: 0.0945
Epoch 54/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9605 - loss: 0.1114 - val_accuracy: 0.9619 - val_loss: 0.0950
Epoch 55/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9660 - loss: 0.1035 - val_accuracy: 0.9634 - val_loss: 0.0913
Epoch 56/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9638 - loss: 0.1146 - val_accuracy: 0.9649 - val_loss: 0.0893
Epoch 57/100
[1m86/86[0m [32m━━━━━━━━━

In [24]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9632 - loss: 0.0778
Test Accuracy: 96.49%


In [25]:
# Predicting the results
y_pred = (model.predict(X_test) > 0.5).astype("int32")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [26]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Classification Report
print(classification_report(y_test, y_pred))

[[526  10]
 [ 20 298]]
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       536
           1       0.97      0.94      0.95       318

    accuracy                           0.96       854
   macro avg       0.97      0.96      0.96       854
weighted avg       0.96      0.96      0.96       854



In [27]:
# Save the model
model.save('mlp.h5')
print("Model saved as mlp_loan_approval_model.h5")



Model saved as mlp_loan_approval_model.h5


In [28]:
import sys
print(sys.executable)
print(sys.version)
!pip list

# Importing the module for LimeTabularExplainer
from lime import lime_tabular
 
# Instantiating the explainer object by passing in the training set,
# and the extracted features
explainer_lime = lime_tabular.LimeTabularExplainer(X_train,
                                                   feature_names=features,
                                                   verbose=True, 
                                                   mode='regression')

D:\Users\91944\anaconda3\python.exe
3.11.3 | packaged by Anaconda, Inc. | (main, Apr 19 2023, 23:46:34) [MSC v.1916 64 bit (AMD64)]
Package                      Version
---------------------------- -----------
absl-py                      1.4.0
altair                       5.0.1
anyio                        3.7.1
argon2-cffi                  21.3.0
argon2-cffi-bindings         21.2.0
arrow                        1.2.3
asgiref                      3.6.0
asttokens                    2.2.1
astunparse                   1.6.3
async-lru                    2.0.3
attrs                        23.1.0
Babel                        2.12.1
backcall                     0.2.0
beautifulsoup4               4.12.2
bleach                       6.0.0
blinker                      1.6.2
brotlipy                     0.7.0
cachetools                   5.3.1
certifi                      2023.5.7
cffi                         1.15.1
chardet                      4.0.0
charset-normalizer           3.1.0
click      


[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: C:\Users\91944\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


ModuleNotFoundError: No module named 'lime'

In [None]:
# Index corresponding to the test vector
i = 10

# Number denoting the top features
k = 5

# Calling the explain_instance method by passing in the:
# 1) ith test vector
# 2) prediction function used by our prediction model('reg' in this case)
# 3) the top features which we want to see, denoted by k

exp_lime = explainer_lime.explain_instance(
	X_test[i], reg.predict, num_features=k)

# Finally visualizing the explanations
exp_lime.show_in_notebook()


In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import load_model

# Load the data
# Assuming your data is in a CSV file, replace 'your_dataset.csv' with your actual file name
data = pd.read_csv('../model/loan_approval_dataset.csv')

# Preprocessing
# Drop the 'loan_id' column as it is not needed for training
data.drop('loan_id', axis=1, inplace=True)

# Encode categorical variables
label_encoders = {}
for column in [' education', ' self_employed']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Encode the target variable
data[' loan_status'] = LabelEncoder().fit_transform(data[' loan_status'])

# Separate features and target variable
X = data.drop(' loan_status', axis=1)
y = data[' loan_status']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Load the model
model = load_model('../model/mlp_loan_approval_model.h5')

import shap

# Initialize the SHAP Deep Explainer
explainer_shap = shap.DeepExplainer(model, X_train[:100])

# Index corresponding to the test vector for local explanation
i = 10

# Compute SHAP values for the ith instance in X_test
shap_values = explainer_shap.shap_values(X_test[i:i+1])
print(shap_values)

# Visualize local explanation
shap.initjs()
shap.force_plot(explainer_shap.expected_value, shap_values, X_test[i])

# Summarize global explanations using SHAP summary plot
shap_values_summary = explainer_shap.shap_values(X_test)
print(shap_values_summary)
shap.summary_plot(shap_values_summary, X_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ModuleNotFoundError: No module named 'shap'