In [61]:
# Import libraries
import pandas as pd
import torch
from torch import nn

In [62]:
df = pd.read_csv("./data/loan_data.csv")

df

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,27.0,male,Associate,47971.0,6,RENT,15000.0,MEDICAL,15.66,0.31,3.0,645,No,1
44996,37.0,female,Associate,65800.0,17,RENT,9000.0,HOMEIMPROVEMENT,14.07,0.14,11.0,621,No,1
44997,33.0,male,Associate,56942.0,7,RENT,2771.0,DEBTCONSOLIDATION,10.02,0.05,10.0,668,No,1
44998,29.0,male,Bachelor,33164.0,4,RENT,12000.0,EDUCATION,13.23,0.36,6.0,604,No,1


In [63]:
# Analyse columns: "person_income", "loan_intent", "loan_percent_income", "credit_score", "loan_status"

df = df[["person_income", "loan_intent", "loan_percent_income", "credit_score", "loan_status"]]

df

Unnamed: 0,person_income,loan_intent,loan_percent_income,credit_score,loan_status
0,71948.0,PERSONAL,0.49,561,1
1,12282.0,EDUCATION,0.08,504,0
2,12438.0,MEDICAL,0.44,635,1
3,79753.0,MEDICAL,0.44,675,1
4,66135.0,MEDICAL,0.53,586,1
...,...,...,...,...,...
44995,47971.0,MEDICAL,0.31,645,1
44996,65800.0,HOMEIMPROVEMENT,0.14,621,1
44997,56942.0,DEBTCONSOLIDATION,0.05,668,1
44998,33164.0,EDUCATION,0.36,604,1


In [64]:
# Define input and output
X = df[["person_income", "loan_intent", "loan_percent_income", "credit_score"]]

# Using get dummies to one hot-encode loan_intent column
X = pd.get_dummies(X, columns=["loan_intent"])

In [65]:
X.head()

Unnamed: 0,person_income,loan_percent_income,credit_score,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
0,71948.0,0.49,561,False,False,False,False,True,False
1,12282.0,0.08,504,False,True,False,False,False,False
2,12438.0,0.44,635,False,False,False,True,False,False
3,79753.0,0.44,675,False,False,False,True,False,False
4,66135.0,0.53,586,False,False,False,True,False,False


In [66]:
# Define ouput
y = df["loan_status"]

In [67]:
y[0:5]

0    1
1    0
2    1
3    1
4    1
Name: loan_status, dtype: int64

In [68]:
# Spliting dataset into train (70 %) and train (30 %)
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [69]:
X_train.head()

Unnamed: 0,person_income,loan_percent_income,credit_score,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE
30159,41579.0,0.14,647,False,False,False,True,False,False
6777,28052.0,0.25,649,True,False,False,False,False,False
36563,63722.0,0.11,626,False,True,False,False,False,False
11784,91870.0,0.01,662,False,False,False,False,True,False
10370,29550.0,0.34,633,True,False,False,False,False,False


In [70]:
# Normalizating input and output using StandardScaler
from sklearn.preprocessing import StandardScaler

scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# scaler_Y = StandardScaler()
# y_train = scaler_Y.fit_transform(y_train.values.reshape(-1, 1))
# y_test = scaler_Y.transform(y_test.values.reshape(-1, 1))

In [71]:
# Transforming input and output into tensors for training
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32).reshape(-1, 1)

In [73]:
# Define model
model = nn.Sequential(
    nn.Linear(9, 1)
)

# Define loss function
criterion = nn.BCEWithLogitsLoss()

# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [74]:
from torch.utils.data import TensorDataset, DataLoader

# Define dataset
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# Define dataloader
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [75]:
# Training loop
epochs = 100

for epoch in range(epochs):
    loss_sum = 0
    for X_batch, y_batch in train_dataloader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()
    if epoch % 10 == 0:
        print(f"Loss: {loss_sum}")

Loss: 522.9163036346436
Loss: 430.8007764071226
Loss: 430.8242027312517
Loss: 430.9209949821234
Loss: 431.0659240037203
Loss: 430.92247182130814
Loss: 430.7633901387453
Loss: 431.0243150740862
Loss: 430.87934832274914
Loss: 430.9517974406481


In [76]:
# Model evaluation
def evaluate_model(X, y):
    model.eval()
    with torch.no_grad():
        y_pred = nn.functional.sigmoid(model(X)) > 0.5
        
        # Calculate accuracy
        accuracy = (y_pred == y).type(torch.float32).mean()
        print(f"Accuracy: {accuracy * 100:.2f} %")

        # Calculate sensivity
        sensivity = (y_pred[y == 1] == y[y == 1]).type(torch.float32).mean()
        print(f"Sensivity: {sensivity * 100:.2f} %")

        # Calculate specifity
        specifity = (y_pred[y == 0] == y[y == 0]).type(torch.float32).mean()
        print(f"Specifity: {specifity * 100:.2f} %")

        # Calculate precision
        precision = (y_pred[y_pred == 1] == y[y_pred == 1]).type(torch.float32).mean()
        print(f"Precision: {precision * 100:.2f} %")

In [77]:
# Evalutation for train df
evaluate_model(X_train, y_train)

Accuracy: 81.79 %
Sensivity: 31.26 %
Specifity: 96.21 %
Precision: 70.15 %


In [78]:
# Evalutation for test df
evaluate_model(X_test, y_test)

Accuracy: 81.33 %
Sensivity: 30.13 %
Specifity: 96.00 %
Precision: 68.33 %
