In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:

# 1. Load the CSV
df = pd.read_csv('data/GiveMeSomeCredit/cs-training.csv', index_col=0)  # The first column is an ID

In [3]:
# 2. Preprocessing: fill missing values, standardize
target = 'SeriousDlqin2yrs'
X = df.drop(columns=[target])
y = df[target].values.astype(np.float32).reshape(-1, 1)

# Fill NaNs with column mean (simple imputation)
X = X.fillna(X.mean())

# Use only numeric features (should be all, but for safety)
X_numeric = X.select_dtypes(include=[np.number])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric.values.astype(np.float32))

In [4]:
# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [5]:
# 4. PyTorch Dataset
class CreditDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = CreditDataset(X_train, y_train)
test_ds = CreditDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=128)

In [6]:
# 5. MLP Model (replace with your technique)
class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

In [7]:
from dpn_3.dpn import DPN
class MLP_DPN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            DPN(input_dim, 25, 1),
            nn.Sigmoid()  # For binary classification
        )
    def forward(self, x):
        return self.model(x)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(X_train.shape[1]).to(device)

In [9]:
# 6. Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [10]:
from utils import train

train_metrics, eval_metrics, test_metrics = train(model, train_loader, test_loader, test_loader, 20, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 1.6504 Average_Time_per_batch: 0.0018 Train_Accuracy: 0.9187 Train_Loss: 0.2695 Validation_Accuracy: 0.9349 Validation_Loss: 0.1945
Epoch: 2 Total_Time: 1.3594 Average_Time_per_batch: 0.0014 Train_Accuracy: 0.9345 Train_Loss: 0.1936 Validation_Accuracy: 0.9362 Validation_Loss: 0.1876
Epoch: 3 Total_Time: 1.5755 Average_Time_per_batch: 0.0017 Train_Accuracy: 0.9353 Train_Loss: 0.1911 Validation_Accuracy: 0.9370 Validation_Loss: 0.1872
Epoch: 4 Total_Time: 1.5405 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9356 Train_Loss: 0.1903 Validation_Accuracy: 0.9373 Validation_Loss: 0.1868
Epoch: 5 Total_Time: 1.3645 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.9353 Train_Loss: 0.1898 Validation_Accuracy: 0.9372 Validation_Loss: 0.1868
Epoch: 6 Total_Time: 1.4728 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9358 Train_Loss: 0.1895 Validation_Accuracy: 0.9375 Validation_Loss: 0.1865
Epoch: 7 Total_Time: 1.4903 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.9359

In [11]:
model = MLP_DPN(X_train.shape[1]).to(device)

In [12]:
# 6. Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [13]:
train_metrics, eval_metrics, test_metrics = train(model, train_loader, test_loader, test_loader, 20, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 1.1531 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9177 Train_Loss: 0.2862 Validation_Accuracy: 0.9348 Validation_Loss: 0.2090
Epoch: 2 Total_Time: 1.1482 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9337 Train_Loss: 0.2005 Validation_Accuracy: 0.9352 Validation_Loss: 0.1906
Epoch: 3 Total_Time: 1.1577 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9345 Train_Loss: 0.1929 Validation_Accuracy: 0.9357 Validation_Loss: 0.1885
Epoch: 4 Total_Time: 1.1571 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9347 Train_Loss: 0.1913 Validation_Accuracy: 0.9358 Validation_Loss: 0.1875
Epoch: 5 Total_Time: 1.1422 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9353 Train_Loss: 0.1905 Validation_Accuracy: 0.9358 Validation_Loss: 0.1872
Epoch: 6 Total_Time: 1.1541 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9350 Train_Loss: 0.1901 Validation_Accuracy: 0.9364 Validation_Loss: 0.1871
Epoch: 7 Total_Time: 1.1516 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.9352