In [46]:
import warnings

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

from utils import preprocess_data

In [47]:
warnings.filterwarnings("ignore")

X_train, X_test, y_train, y_test, train_df, test_df = preprocess_data(standardise=True)

In [48]:
# Customise the data to be fed in terms of tensors
class LoanDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = LoanDataset(X_train, y_train)
test_dataset = LoanDataset(X_test, y_test)

In [49]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [50]:
class TabularNet(nn.Module):
    def __init__(self, name=None):
        super(TabularNet, self).__init__()
        if name:
            self.name = name
        self.fc1 = nn.Linear(13, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 2)  # Output layer for binary classification

        # Compute the total number of parameters
        total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(self.name + ': total params:', total_params)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

net = TabularNet(name='TabularNet')

TabularNet: total params: 12194


In [51]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=25):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
    return all_preds

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_model(net, train_loader, criterion, optimizer, num_epochs=50)


Epoch [1/50], Loss: 0.6549
Epoch [2/50], Loss: 0.5724
Epoch [3/50], Loss: 0.4959
Epoch [4/50], Loss: 0.4540
Epoch [5/50], Loss: 0.4413
Epoch [6/50], Loss: 0.4200
Epoch [7/50], Loss: 0.4184
Epoch [8/50], Loss: 0.4241
Epoch [9/50], Loss: 0.4021
Epoch [10/50], Loss: 0.4080
Epoch [11/50], Loss: 0.3941
Epoch [12/50], Loss: 0.3748
Epoch [13/50], Loss: 0.3741
Epoch [14/50], Loss: 0.3682
Epoch [15/50], Loss: 0.3466
Epoch [16/50], Loss: 0.3405
Epoch [17/50], Loss: 0.3402
Epoch [18/50], Loss: 0.3205
Epoch [19/50], Loss: 0.3104
Epoch [20/50], Loss: 0.3075
Epoch [21/50], Loss: 0.3066
Epoch [22/50], Loss: 0.2825
Epoch [23/50], Loss: 0.2781
Epoch [24/50], Loss: 0.2738
Epoch [25/50], Loss: 0.2714
Epoch [26/50], Loss: 0.2547
Epoch [27/50], Loss: 0.2633
Epoch [28/50], Loss: 0.2545
Epoch [29/50], Loss: 0.2414
Epoch [30/50], Loss: 0.2387
Epoch [31/50], Loss: 0.2174
Epoch [32/50], Loss: 0.2215
Epoch [33/50], Loss: 0.2207
Epoch [34/50], Loss: 0.2161
Epoch [35/50], Loss: 0.2020
Epoch [36/50], Loss: 0.2134
E

In [52]:
y_pred = evaluate_model(net, test_loader)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.7154
