In [1]:
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import os
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

path = kagglehub.dataset_download("uciml/default-of-credit-card-clients-dataset")

Using Colab cache for faster access to the 'default-of-credit-card-clients-dataset' dataset.


In [2]:
# CSV file in the downloaded directory
for file in os.listdir(path):
    if file.endswith(".csv"):
        csv_file = os.path.join(path, file)
        break

df = pd.read_csv(csv_file)

feature_vec = df.drop('default.payment.next.month', axis=1)
target_vec = df['default.payment.next.month']

features = torch.tensor(feature_vec.values,dtype=torch.float32)
targets = torch.tensor(target_vec.values,dtype=torch.long)

dataset = TensorDataset(features,targets)

print(features.shape)

torch.Size([30000, 24])


In [3]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)


In [4]:
# TODO set device to cuda
if torch.cuda.is_available():
  print("GPU detected")
  device = torch.device("cuda")
else:
  print("No GPU detected")
  device = torch.device("cpu")

GPU detected


In [10]:
def get_accuracy_and_loss(model, loader, criterion):
  model.eval()
  my_loss = 0
  with torch.no_grad():
    correct = 0
    for data, target in loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      pred = output.argmax(dim=1)
      correct += pred.eq(target).sum().item()
      my_loss += criterion(output, target).item()
  return correct/len(loader.dataset), my_loss/len(loader.dataset)

def get_auc(model, loader, device):
    model.eval()
    all_probs = []
    all_targets = []
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            probs = torch.softmax(output, dim=1)[:, 1]
            all_probs.append(probs.cpu())
            all_targets.append(target.cpu())
    all_probs = torch.cat(all_probs).numpy()
    all_targets = torch.cat(all_targets).numpy()
    return roc_auc_score(all_targets, all_probs)

In [11]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(24, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Linear(32, 2)
        )

    def forward(self, x):
        return self.net(x)

In [12]:
# TODO create a network and move it to GPU

model = NN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
patience = 3
epochs_without_improve = 0
best_val_loss = float('inf')
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(100):
    model.train()
    train_loss = 0
    correct = 0
    total_count = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total_count += data.size(0)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch} done.")
    train_accuracy = correct / total_count
    train_loss = train_loss / total_count
    print(f"Train accuracy: {train_accuracy}")
    train_accuracies.append(train_accuracy)
    print(f"Train loss: {train_loss}")
    train_losses.append(train_loss)
    val_accuracy, val_loss = get_accuracy_and_loss(model, val_loader, criterion)
    print(f"Val accuracy: {val_accuracy}")
    val_accuracies.append(val_accuracy)
    print(f"Val loss: {val_loss}")
    val_losses.append(val_loss)
    val_auc = get_auc(model, val_loader, device)
    print(f"Val AUC: {val_auc:.4f}")
    # Check for improvement
    if val_loss < best_val_loss:
      best_val_loss = val_loss
      torch.save(model.state_dict(), 'best_model.pth')
      epochs_without_improve = 0
    else:
      epochs_without_improve += 1
      if epochs_without_improve >= patience:
        break

Epoch 0 done.
Train accuracy: 0.7754583333333334
Train loss: 0.01637953600163261
Val accuracy: 0.7845
Val loss: 0.015577536756793658
Val AUC: 0.6645
Epoch 1 done.
Train accuracy: 0.7773333333333333
Train loss: 0.0159387499888738
Val accuracy: 0.7845
Val loss: 0.015523913234472275
Val AUC: 0.6599
Epoch 2 done.
Train accuracy: 0.7773333333333333
Train loss: 0.015795223532865443
Val accuracy: 0.7845
Val loss: 0.015362561563650767
Val AUC: 0.6788
Epoch 3 done.
Train accuracy: 0.7774583333333334
Train loss: 0.01570082364976406
Val accuracy: 0.7845
Val loss: 0.015271286035577456
Val AUC: 0.6795
Epoch 4 done.
Train accuracy: 0.777
Train loss: 0.015620595299949249
Val accuracy: 0.7845
Val loss: 0.015213162461916606
Val AUC: 0.6897
Epoch 5 done.
Train accuracy: 0.7773333333333333
Train loss: 0.015566968906670808
Val accuracy: 0.7843333333333333
Val loss: 0.015509137173493703
Val AUC: 0.6631
Epoch 6 done.
Train accuracy: 0.777125
Train loss: 0.015541706966857116
Val accuracy: 0.7845
Val loss: 0.