In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle
import dill

Dataset Content
The dataset includes 14 columns representing different factors influencing loan approvals and defaults:

Personal Information

person_age: Age of the applicant (in years).
person_gender: Gender of the applicant (male, female).
person_education: Educational background (High School, Bachelor, Master, etc.).
person_income: Annual income of the applicant (in USD).
person_emp_exp: Years of employment experience.
person_home_ownership: Type of home ownership (RENT, OWN, MORTGAGE).
Loan Details

loan_amnt: Loan amount requested (in USD).
loan_intent: Purpose of the loan (PERSONAL, EDUCATION, MEDICAL, etc.).
loan_int_rate: Interest rate on the loan (percentage).
loan_percent_income: Ratio of loan amount to income.
Credit & Loan History

cb_person_cred_hist_length: Length of the applicant's credit history (in years).
credit_score: Credit score of the applicant.
previous_loan_defaults_on_file: Whether the applicant has previous loan defaults (Yes or No).
Target Variable

loan_status: 1 if the loan was repaid successfully, 0 if the applicant defaulted.

In [5]:
## Load the dataset
data=pd.read_csv("loan_data.csv")
data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [6]:

label_encoder_gender=LabelEncoder()
data['person_gender']=label_encoder_gender.fit_transform(data['person_gender'])
data

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,0,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,0,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,0,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,0,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,1,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,1,Associate,47971.0,6,RENT,15000.0,MEDICAL,15.66,0.31,3.0,645,No,1
44996,37.0,0,Associate,65800.0,17,RENT,9000.0,HOMEIMPROVEMENT,14.07,0.14,11.0,621,No,1
44997,33.0,1,Associate,56942.0,7,RENT,2771.0,DEBTCONSOLIDATION,10.02,0.05,10.0,668,No,1
44998,29.0,1,Bachelor,33164.0,4,RENT,12000.0,EDUCATION,13.23,0.36,6.0,604,No,1


In [7]:

from sklearn.preprocessing import OneHotEncoder
onehot_encoder_int=OneHotEncoder()
intent_encoder=onehot_encoder_int.fit_transform(data[['loan_intent']]).toarray()
intent_encoder

array([[0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       ...,
       [1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.]])

In [8]:
intent_encoded_df=pd.DataFrame(intent_encoder,columns=onehot_encoder_int.get_feature_names_out(['loan_intent']))
intent_encoded_df

Unnamed: 0,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...
44995,0.0,0.0,0.0,1.0,0.0,0.0
44996,0.0,0.0,1.0,0.0,0.0,0.0
44997,1.0,0.0,0.0,0.0,0.0,0.0
44998,0.0,1.0,0.0,0.0,0.0,0.0


In [9]:
## Combine one hot encoder columns with the original data
data=pd.concat([data.drop('loan_intent',axis=1),intent_encoded_df],axis=1)
data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,22.0,0,Master,71948.0,0,RENT,35000.0,16.02,0.49,3.0,561,No,1,0.0,0.0,0.0,0.0,1.0,0.0
1,21.0,0,High School,12282.0,0,OWN,1000.0,11.14,0.08,2.0,504,Yes,0,0.0,1.0,0.0,0.0,0.0,0.0
2,25.0,0,High School,12438.0,3,MORTGAGE,5500.0,12.87,0.44,3.0,635,No,1,0.0,0.0,0.0,1.0,0.0,0.0
3,23.0,0,Bachelor,79753.0,0,RENT,35000.0,15.23,0.44,2.0,675,No,1,0.0,0.0,0.0,1.0,0.0,0.0
4,24.0,1,Master,66135.0,1,RENT,35000.0,14.27,0.53,4.0,586,No,1,0.0,0.0,0.0,1.0,0.0,0.0


In [10]:
# Define custom label encoding order
def custom_education(data):
    education_order = ['High School', 'Associate', 'Bachelor', 'Master', 'Doctorate']
    education_mapping = {level: idx for idx, level in enumerate(education_order)}

    # Apply custom encoding
    return data['person_education'].map(education_mapping)

data['person_education'] = custom_education(data)
data

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,22.0,0,3,71948.0,0,RENT,35000.0,16.02,0.49,3.0,561,No,1,0.0,0.0,0.0,0.0,1.0,0.0
1,21.0,0,0,12282.0,0,OWN,1000.0,11.14,0.08,2.0,504,Yes,0,0.0,1.0,0.0,0.0,0.0,0.0
2,25.0,0,0,12438.0,3,MORTGAGE,5500.0,12.87,0.44,3.0,635,No,1,0.0,0.0,0.0,1.0,0.0,0.0
3,23.0,0,2,79753.0,0,RENT,35000.0,15.23,0.44,2.0,675,No,1,0.0,0.0,0.0,1.0,0.0,0.0
4,24.0,1,3,66135.0,1,RENT,35000.0,14.27,0.53,4.0,586,No,1,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,1,1,47971.0,6,RENT,15000.0,15.66,0.31,3.0,645,No,1,0.0,0.0,0.0,1.0,0.0,0.0
44996,37.0,0,1,65800.0,17,RENT,9000.0,14.07,0.14,11.0,621,No,1,0.0,0.0,1.0,0.0,0.0,0.0
44997,33.0,1,1,56942.0,7,RENT,2771.0,10.02,0.05,10.0,668,No,1,1.0,0.0,0.0,0.0,0.0,0.0
44998,29.0,1,2,33164.0,4,RENT,12000.0,13.23,0.36,6.0,604,No,1,0.0,1.0,0.0,0.0,0.0,0.0


In [None]:
def custom_house(data):
    housing_order = ['RENT', 'MORTGAGE', 'OWN', 'OTHER']
    housing_mapping = {status: idx for idx, status in enumerate(housing_order)}
    return data['person_home_ownership'].map(housing_mapping)


data['person_home_ownership'] = custom_house(data)


In [12]:
data

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,22.0,0,3,71948.0,0,0,35000.0,16.02,0.49,3.0,561,No,1,0.0,0.0,0.0,0.0,1.0,0.0
1,21.0,0,0,12282.0,0,2,1000.0,11.14,0.08,2.0,504,Yes,0,0.0,1.0,0.0,0.0,0.0,0.0
2,25.0,0,0,12438.0,3,1,5500.0,12.87,0.44,3.0,635,No,1,0.0,0.0,0.0,1.0,0.0,0.0
3,23.0,0,2,79753.0,0,0,35000.0,15.23,0.44,2.0,675,No,1,0.0,0.0,0.0,1.0,0.0,0.0
4,24.0,1,3,66135.0,1,0,35000.0,14.27,0.53,4.0,586,No,1,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,1,1,47971.0,6,0,15000.0,15.66,0.31,3.0,645,No,1,0.0,0.0,0.0,1.0,0.0,0.0
44996,37.0,0,1,65800.0,17,0,9000.0,14.07,0.14,11.0,621,No,1,0.0,0.0,1.0,0.0,0.0,0.0
44997,33.0,1,1,56942.0,7,0,2771.0,10.02,0.05,10.0,668,No,1,1.0,0.0,0.0,0.0,0.0,0.0
44998,29.0,1,2,33164.0,4,0,12000.0,13.23,0.36,6.0,604,No,1,0.0,1.0,0.0,0.0,0.0,0.0


In [13]:
label_encoder_default=LabelEncoder()
data['previous_loan_defaults_on_file']=label_encoder_default.fit_transform(data['previous_loan_defaults_on_file'])
data

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,22.0,0,3,71948.0,0,0,35000.0,16.02,0.49,3.0,561,0,1,0.0,0.0,0.0,0.0,1.0,0.0
1,21.0,0,0,12282.0,0,2,1000.0,11.14,0.08,2.0,504,1,0,0.0,1.0,0.0,0.0,0.0,0.0
2,25.0,0,0,12438.0,3,1,5500.0,12.87,0.44,3.0,635,0,1,0.0,0.0,0.0,1.0,0.0,0.0
3,23.0,0,2,79753.0,0,0,35000.0,15.23,0.44,2.0,675,0,1,0.0,0.0,0.0,1.0,0.0,0.0
4,24.0,1,3,66135.0,1,0,35000.0,14.27,0.53,4.0,586,0,1,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,1,1,47971.0,6,0,15000.0,15.66,0.31,3.0,645,0,1,0.0,0.0,0.0,1.0,0.0,0.0
44996,37.0,0,1,65800.0,17,0,9000.0,14.07,0.14,11.0,621,0,1,0.0,0.0,1.0,0.0,0.0,0.0
44997,33.0,1,1,56942.0,7,0,2771.0,10.02,0.05,10.0,668,0,1,1.0,0.0,0.0,0.0,0.0,0.0
44998,29.0,1,2,33164.0,4,0,12000.0,13.23,0.36,6.0,604,0,1,0.0,1.0,0.0,0.0,0.0,0.0


In [14]:
## Save the encoders and sscaler
with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender,file)

with open('onehot_encoder_intent.pkl','wb') as file:
    pickle.dump(onehot_encoder_int,file)
    
with open('label_encoder_default.pkl','wb') as file:
    pickle.dump(label_encoder_default,file)

with open('custom_encoding_house.pkl','wb') as file:
    dill.dump(custom_house,file)
    
with open('custom_encoding_education.pkl','wb') as file:
    dill.dump(custom_education,file)


In [15]:
## DiVide the dataset into indepent and dependent features
X=data.drop('loan_status',axis=1)
y=data['loan_status']

## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [16]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

ANN Implementaiton


In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime




In [18]:
model = Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), ## Hidden layer 1
    Dense(32,activation='relu'), # hidden layer 2
    Dense(1,activation='sigmoid') #output layer
])





In [19]:
#optimisers
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [20]:
model.compile(optimizer=opt,loss=loss,metrics=["accuracy"])

In [21]:
## Set up the TensorBoard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callbacks=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [22]:
#Set up Early Stopping
early_stopping_callback = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [23]:
history=model.fit(
X_train,y_train,validation_data=(X_test,y_test),epochs=100,
callbacks=[tensorflow_callbacks,early_stopping_callback]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


In [24]:
%load_ext tensorboard

In [25]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 24344), started 22:58:08 ago. (Use '!kill 24344' to kill it.)

In [26]:
model.save('model.h5')

  saving_api.save_model(
