Code ran in Google Colab for training purposes.

In [74]:
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

path = kagglehub.dataset_download("uciml/default-of-credit-card-clients-dataset")

Using Colab cache for faster access to the 'default-of-credit-card-clients-dataset' dataset.


In [88]:
# CSV file in the downloaded directory
for file in os.listdir(path):
    if file.endswith(".csv"):
        csv_file = os.path.join(path, file)
        break

df = pd.read_csv(csv_file)

feature_vec = df.drop('default.payment.next.month', axis=1)
target_vec = df['default.payment.next.month']

features = torch.tensor(feature_vec.values,dtype=torch.float32)
targets = torch.tensor(target_vec.values,dtype=torch.long)

dataset = TensorDataset(features,targets)

print(features.shape)

torch.Size([30000, 24])


In [89]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)


In [90]:
# TODO set device to cuda
if torch.cuda.is_available():
  print("GPU detected")
  device = torch.device("cuda")
else:
  print("No GPU detected")
  device = torch.device("cpu")

GPU detected


In [91]:
def get_accuracy_and_loss(model, loader, criterion):
  model.eval()
  my_loss = 0
  with torch.no_grad():
    correct = 0
    for data, target in loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      pred = output.argmax(dim=1)
      correct += pred.eq(target).sum().item()
      my_loss += criterion(output, target).item()
  return correct/len(loader.dataset), my_loss/len(loader.dataset)

In [105]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(24, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Linear(32, 2)
        )

    def forward(self, x):
        return self.net(x)

In [106]:
# TODO create a network and move it to GPU

model = NN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [109]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
patience = 3
epochs_without_improve = 0
best_val_loss = float('inf')
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(100):
    model.train()
    train_loss = 0
    correct = 0
    total_count = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total_count += data.size(0)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch} done.")
    train_accuracy = correct / total_count
    train_loss = train_loss / total_count
    print(f"Train accuracy: {train_accuracy}")
    train_accuracies.append(train_accuracy)
    print(f"Train loss: {train_loss}")
    train_losses.append(train_loss)
    val_accuracy, val_loss = get_accuracy_and_loss(model, val_loader, criterion)
    print(f"Val accuracy: {val_accuracy}")
    val_accuracies.append(val_accuracy)
    print(f"Val loss: {val_loss}")
    val_losses.append(val_loss)
    # Check for improvement
    if val_loss < best_val_loss:
      best_val_loss = val_loss
      torch.save(model.state_dict(), 'best_model.pth')
      epochs_without_improve = 0
    else:
      epochs_without_improve += 1
      if epochs_without_improve >= patience:
        break

Epoch 0 done.
Train accuracy: 0.7787083333333333
Train loss: 0.015268189246455828
Val accuracy: 0.7806666666666666
Val loss: 0.015716490646203358
Epoch 1 done.
Train accuracy: 0.7778333333333334
Train loss: 0.01531418011834224
Val accuracy: 0.7803333333333333
Val loss: 0.015587726160883903
Epoch 2 done.
Train accuracy: 0.777625
Train loss: 0.015299207189430793
Val accuracy: 0.7803333333333333
Val loss: 0.01537394835303227
Epoch 3 done.
Train accuracy: 0.778
Train loss: 0.015312454582502445
Val accuracy: 0.7803333333333333
Val loss: 0.015430979105333488
Epoch 4 done.
Train accuracy: 0.778375
Train loss: 0.015267308649917443
Val accuracy: 0.7806666666666666
Val loss: 0.015574835518995921
Epoch 5 done.
Train accuracy: 0.779
Train loss: 0.015298629229267438
Val accuracy: 0.7806666666666666
Val loss: 0.015391796367863813
