In [279]:
import torch
from torch import nn

from sklearn import preprocessing

import pandas as pd
import numpy as np

from tqdm.auto import tqdm

In [280]:
data_path = "D:/PROGRAMMING/PYTHON/dataset/loan_approval/loan_approval_dataset.csv"
df = pd.read_csv(data_path)

df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


## HELPER FUNCTIONS

In [281]:
def handle_non_num_data(df):
    columns = df.columns.values
    for col in columns:
        text_digit_vals = {}

        def convert_to_int_val(val):
            return text_digit_vals[val]

        if df[col].dtype != np.int64 and df[col].dtype != np.float64:
            column_contents = df[col].values.tolist()
            unique_el = set(column_contents)

            x = 0
            for unique in unique_el:
                if unique not in text_digit_vals:
                    text_digit_vals[unique] = x
                    x += 1
            df[col] = list(map(convert_to_int_val, df[col]))

    return df

def acc_fn(y_true , y_pred):
    correct = torch.eq(y_true , y_pred).sum().item()
    acc = (correct/len(y_pred)) * 100
    return acc 

## DATA PROCESSING

In [282]:
df = handle_non_num_data(df)
df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,0,1,9600000,29900000,12,778,2400000,17600000,22700000,8000000,0
1,2,0,1,0,4100000,12200000,8,417,2700000,2200000,8800000,3300000,1
2,3,3,0,1,9100000,29700000,20,506,7100000,4500000,33300000,12800000,1
3,4,3,0,1,8200000,30700000,8,467,18200000,3300000,23300000,7900000,1
4,5,5,1,0,9800000,24200000,20,382,12400000,8200000,29400000,5000000,1


In [283]:
X = np.array(df.drop([" loan_status" , "loan_id"]  , 1)).astype(float)
X = preprocessing.scale(X)
y = np.array(df[" loan_status"])

  X = np.array(df.drop([" loan_status" , "loan_id"]  , 1)).astype(float)


In [284]:
X[:5] , y[:5]

(array([[-0.2941017 , -0.99555917,  1.00728821,  1.61797904,  1.63305171,
          0.19261651,  1.03279241, -0.78005754,  2.877289  ,  0.83202837,
          0.93030441],
        [-1.4735476 ,  1.00446064, -0.99276452, -0.34174956, -0.32441406,
         -0.50809068, -1.06105118, -0.7339241 , -0.63192107, -0.69499321,
         -0.51593638],
        [ 0.29562125, -0.99555917,  1.00728821,  1.4398219 ,  1.61093345,
          1.59403089, -0.54484044, -0.05730028, -0.10781827,  1.99651964,
          2.40731629],
        [ 0.29562125, -0.99555917,  1.00728821,  1.11913903,  1.72152474,
         -0.50809068, -0.77104514,  1.64963709, -0.38126321,  0.89794297,
          0.89953333],
        [ 1.47506715,  1.00446064, -0.99276452,  1.6892419 ,  1.00268138,
          1.59403089, -1.26405541,  0.75772387,  0.73530363,  1.56807474,
          0.00717199]]),
 array([0, 1, 1, 1, 1], dtype=int64))

In [285]:
split = int(len(X) * 0.2)

X, y= torch.tensor(X, dtype = torch.float) , torch.tensor(y , dtype = torch.float)

X = nn.functional.normalize(X)

X_train = X[:split]
X_test = X[split:]
y_train = y[:split]
y_test = y[split:]

In [286]:
device = "cuda" if torch.cuda.is_available() else "cpu"

X_train , X_test = X_train.to(device) , X_test.to(device)
y_train , y_test =  y_train.to(device) , y_test.to(device)

## BUILDING A MODEL

In [287]:
class ClusterNN(nn.Module):
    def __init__(self , input_shape , hidden_units , output_shape):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(in_features = input_shape , out_features = hidden_units),
            nn.ReLU(),

            nn.Linear(in_features=hidden_units , out_features = hidden_units),
            nn.ReLU(),

            nn.Linear(in_features=hidden_units , out_features= output_shape)
        )

    def forward(self , X):
        X = self.layer1(X)
        return X
    
model = ClusterNN(X_train.shape[1] , 32 , 1).to(device)

loss_fn = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(params = model.parameters() , lr = 0.001)

## TRAINING LOOP

In [288]:
EPOCHS = 500

for epoch in tqdm(range(EPOCHS)):
    model.train()
    # Forward pass
    y_logits = model(X_train).squeeze()

    y_pred = torch.round(torch.sigmoid(y_logits))

    # Calculate Loss , Accuracy
    loss = loss_fn(y_logits , y_train)

    acc = acc_fn(y_train , y_pred)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    # TESTING LOOP
    model.eval()

    with torch.inference_mode():
        test_logits = model(X_test).squeeze()

        test_pred = torch.round(torch.sigmoid(test_logits))

        # Calculate test loss and accuracy
        test_loss = loss_fn(test_logits , y_test)

        test_acc = acc_fn(y_test , test_pred)

    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.3f} , Acc:{acc:.2f}% | Test Loss: {test_loss:.3f} , Test Acc: {test_acc:.2f}%")

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch: 0 | Loss: 0.698 , Acc:34.82% | Test Loss: 0.695 , Test Acc: 43.15%
Epoch: 10 | Loss: 0.681 , Acc:62.13% | Test Loss: 0.679 , Test Acc: 62.70%
Epoch: 20 | Loss: 0.665 , Acc:61.78% | Test Loss: 0.664 , Test Acc: 62.56%
Epoch: 30 | Loss: 0.647 , Acc:62.72% | Test Loss: 0.647 , Test Acc: 63.26%
Epoch: 40 | Loss: 0.624 , Acc:64.95% | Test Loss: 0.626 , Test Acc: 64.64%
Epoch: 50 | Loss: 0.596 , Acc:69.40% | Test Loss: 0.599 , Test Acc: 68.21%
Epoch: 60 | Loss: 0.560 , Acc:74.68% | Test Loss: 0.565 , Test Acc: 73.27%
Epoch: 70 | Loss: 0.517 , Acc:81.24% | Test Loss: 0.523 , Test Acc: 78.92%
Epoch: 80 | Loss: 0.468 , Acc:85.46% | Test Loss: 0.475 , Test Acc: 83.28%
Epoch: 90 | Loss: 0.416 , Acc:88.86% | Test Loss: 0.425 , Test Acc: 86.89%
Epoch: 100 | Loss: 0.366 , Acc:91.32% | Test Loss: 0.377 , Test Acc: 89.02%
Epoch: 110 | Loss: 0.322 , Acc:92.03% | Test Loss: 0.335 , Test Acc: 90.22%
Epoch: 120 | Loss: 0.286 , Acc:92.85% | Test Loss: 0.301 , Test Acc: 90.93%
Epoch: 130 | Loss: 0.25