# Multi-Layer Perceptron Model

Train a 3-layer neural network using PyTorch with early stopping for loan default prediction.

In [None]:
import os
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    brier_score_loss,
    classification_report,
    confusion_matrix
)

# Configuration
DATA_DIR = "../data"
MODELS_DIR = "../models"
RESULTS_DIR = "../results"

TRAIN_FILE = os.path.join(DATA_DIR, "train.csv")
VAL_FILE = os.path.join(DATA_DIR, "val.csv")
TEST_FILE = os.path.join(DATA_DIR, "test.csv")

MODEL_FILE = os.path.join(MODELS_DIR, "mlp_model.pth")
PREDICTIONS_FILE = os.path.join(RESULTS_DIR, "mlp_predictions.csv")
METRICS_FILE = os.path.join(RESULTS_DIR, "mlp_metrics.json")

TARGET_COL = "status"
RANDOM_STATE = 42

BATCH_SIZE = 64
MAX_EPOCHS = 200
LEARNING_RATE = 0.0008
PATIENCE = 20
WEIGHT_DECAY = 1e-5
FOCAL_ALPHA = 0.3
FOCAL_GAMMA = 2.5
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
if torch.cuda.is_available():
    torch.cuda.manual_seed(RANDOM_STATE)

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

In [None]:
# Define Focal Loss for imbalanced classification
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        
    def forward(self, inputs, targets):
        bce_loss = nn.functional.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-bce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * bce_loss
        return focal_loss.mean()

# Define Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, dim, dropout=0.3):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dim, dim),
            nn.BatchNorm1d(dim)
        )
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        residual = x
        out = self.block(x)
        out += residual
        out = self.relu(out)
        out = self.dropout(out)
        return out

# Define Advanced Deep MLP
class SimpleMLP(nn.Module):
    def __init__(self, input_dim):
        super(SimpleMLP, self).__init__()
        
        # Larger initial projection
        self.input_layer = nn.Sequential(
            nn.Linear(input_dim, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # More residual blocks at high dimension
        self.res_block1 = ResidualBlock(768, dropout=0.4)
        self.res_block2 = ResidualBlock(768, dropout=0.4)
        self.res_block3 = ResidualBlock(768, dropout=0.4)
        
        # Gradual downsampling
        self.down1 = nn.Sequential(
            nn.Linear(768, 384),
            nn.BatchNorm1d(384),
            nn.ReLU(),
            nn.Dropout(0.4)
        )
        
        self.res_block4 = ResidualBlock(384, dropout=0.35)
        self.res_block5 = ResidualBlock(384, dropout=0.35)
        
        self.down2 = nn.Sequential(
            nn.Linear(384, 192),
            nn.BatchNorm1d(192),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        self.res_block6 = ResidualBlock(192, dropout=0.3)
        
        self.down3 = nn.Sequential(
            nn.Linear(192, 96),
            nn.BatchNorm1d(96),
            nn.ReLU(),
            nn.Dropout(0.25)
        )
        
        self.res_block7 = ResidualBlock(96, dropout=0.25)
        
        # Final layer
        self.output_layer = nn.Sequential(
            nn.Linear(96, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.input_layer(x)
        x = self.res_block1(x)
        x = self.res_block2(x)
        x = self.res_block3(x)
        x = self.down1(x)
        x = self.res_block4(x)
        x = self.res_block5(x)
        x = self.down2(x)
        x = self.res_block6(x)
        x = self.down3(x)
        x = self.res_block7(x)
        x = self.output_layer(x)
        return x

In [3]:
# Load data
train_df = pd.read_csv(TRAIN_FILE)
val_df = pd.read_csv(VAL_FILE)
test_df = pd.read_csv(TEST_FILE)

X_train = train_df.drop(columns=[TARGET_COL]).values
y_train = train_df[TARGET_COL].values

X_val = val_df.drop(columns=[TARGET_COL]).values
y_val = val_df[TARGET_COL].values

X_test = test_df.drop(columns=[TARGET_COL]).values
y_test = test_df[TARGET_COL].values

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")

Training set: (179250, 67)
Validation set: (14867, 67)
Test set: (14867, 67)


In [4]:
# Create dataloaders
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
# Initialize model
input_dim = X_train.shape[1]
model = SimpleMLP(input_dim).to(DEVICE)
criterion = FocalLoss(alpha=FOCAL_ALPHA, gamma=FOCAL_GAMMA)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)

print(f"Model initialized with {sum(p.numel() for p in model.parameters())} parameters")
print(f"Training on device: {DEVICE}")
print(f"Using Focal Loss with alpha={FOCAL_ALPHA}, gamma={FOCAL_GAMMA}")

Model initialized with 1430145 parameters
Training on device: cpu
Using Focal Loss with alpha=0.25, gamma=2.0


In [6]:
# Training functions
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        
        # Gradient clipping for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

def validate(model, val_loader, criterion):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

In [7]:
# Train with early stopping
best_val_loss = float('inf')
patience_counter = 0
best_model_state = None
best_val_auc = 0.0

for epoch in range(MAX_EPOCHS):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    
    # Calculate validation AUC during training
    model.eval()
    with torch.no_grad():
        val_proba = model(torch.FloatTensor(X_val).to(DEVICE)).cpu().numpy().flatten()
        val_auc = roc_auc_score(y_val, val_proba)
    
    # Update learning rate scheduler
    scheduler.step()
    
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {epoch+1}/{MAX_EPOCHS} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val AUC: {val_auc:.4f}, LR: {current_lr:.6f}")
    
    # Save best model based on AUC (more relevant than loss)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        best_val_loss = val_loss
        patience_counter = 0
        best_model_state = model.state_dict().copy()
    else:
        patience_counter += 1
    
    if patience_counter >= PATIENCE:
        print(f"Early stopping triggered after {epoch+1} epochs")
        break

model.load_state_dict(best_model_state)
print(f"Best validation AUC: {best_val_auc:.4f}")
print(f"Best validation loss: {best_val_loss:.4f}")

Epoch 1/150 - Train Loss: 0.0300, Val Loss: 0.0248, Val AUC: 0.8684, LR: 0.000976


Epoch 2/150 - Train Loss: 0.0271, Val Loss: 0.0236, Val AUC: 0.8775, LR: 0.000905


Epoch 3/150 - Train Loss: 0.0264, Val Loss: 0.0235, Val AUC: 0.8793, LR: 0.000794


Epoch 4/150 - Train Loss: 0.0259, Val Loss: 0.0234, Val AUC: 0.8808, LR: 0.000655


Epoch 5/150 - Train Loss: 0.0255, Val Loss: 0.0225, Val AUC: 0.8838, LR: 0.000501


Epoch 6/150 - Train Loss: 0.0251, Val Loss: 0.0226, Val AUC: 0.8855, LR: 0.000346


Epoch 7/150 - Train Loss: 0.0247, Val Loss: 0.0221, Val AUC: 0.8864, LR: 0.000207


Epoch 8/150 - Train Loss: 0.0245, Val Loss: 0.0227, Val AUC: 0.8858, LR: 0.000096


Epoch 9/150 - Train Loss: 0.0243, Val Loss: 0.0218, Val AUC: 0.8862, LR: 0.000025


Epoch 10/150 - Train Loss: 0.0241, Val Loss: 0.0219, Val AUC: 0.8867, LR: 0.001000


Epoch 11/150 - Train Loss: 0.0249, Val Loss: 0.0223, Val AUC: 0.8838, LR: 0.000994


Epoch 12/150 - Train Loss: 0.0247, Val Loss: 0.0227, Val AUC: 0.8767, LR: 0.000976


Epoch 13/150 - Train Loss: 0.0245, Val Loss: 0.0213, Val AUC: 0.8848, LR: 0.000946


Epoch 14/150 - Train Loss: 0.0243, Val Loss: 0.0224, Val AUC: 0.8832, LR: 0.000905


Epoch 15/150 - Train Loss: 0.0240, Val Loss: 0.0217, Val AUC: 0.8835, LR: 0.000854


Epoch 16/150 - Train Loss: 0.0238, Val Loss: 0.0223, Val AUC: 0.8842, LR: 0.000794


Epoch 17/150 - Train Loss: 0.0236, Val Loss: 0.0215, Val AUC: 0.8828, LR: 0.000727


Epoch 18/150 - Train Loss: 0.0233, Val Loss: 0.0211, Val AUC: 0.8847, LR: 0.000655


Epoch 19/150 - Train Loss: 0.0231, Val Loss: 0.0212, Val AUC: 0.8844, LR: 0.000579


Epoch 20/150 - Train Loss: 0.0229, Val Loss: 0.0214, Val AUC: 0.8836, LR: 0.000501


Epoch 21/150 - Train Loss: 0.0227, Val Loss: 0.0255, Val AUC: 0.8723, LR: 0.000422


Epoch 22/150 - Train Loss: 0.0225, Val Loss: 0.0207, Val AUC: 0.8835, LR: 0.000346


Epoch 23/150 - Train Loss: 0.0223, Val Loss: 0.0204, Val AUC: 0.8847, LR: 0.000274


Epoch 24/150 - Train Loss: 0.0222, Val Loss: 0.0204, Val AUC: 0.8843, LR: 0.000207


Epoch 25/150 - Train Loss: 0.0220, Val Loss: 0.0202, Val AUC: 0.8854, LR: 0.000147
Early stopping triggered after 25 epochs
Best validation AUC: 0.8867
Best validation loss: 0.0219


In [8]:
# Evaluate on validation set
model.eval()
X_val_tensor = torch.FloatTensor(X_val).to(DEVICE)

with torch.no_grad():
    y_val_proba = model(X_val_tensor).cpu().numpy().flatten()

y_val_pred = (y_val_proba >= 0.5).astype(int)

val_auc_roc = roc_auc_score(y_val, y_val_proba)
val_auc_pr = average_precision_score(y_val, y_val_proba)
val_brier = brier_score_loss(y_val, y_val_proba)

print("Validation Set Performance:")
print(f"AUC-ROC: {val_auc_roc:.4f}")
print(f"AUC-PR: {val_auc_pr:.4f}")
print(f"Brier Score: {val_brier:.4f}")
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred))

Validation Set Performance:
AUC-ROC: 0.8854
AUC-PR: 0.8322
Brier Score: 0.1282

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.97      0.93     11203
           1       0.86      0.66      0.75      3664

    accuracy                           0.89     14867
   macro avg       0.88      0.81      0.84     14867
weighted avg       0.89      0.89      0.88     14867


Confusion Matrix:
[[10825   378]
 [ 1262  2402]]


In [9]:
# Evaluate on test set
X_test_tensor = torch.FloatTensor(X_test).to(DEVICE)

with torch.no_grad():
    y_test_proba = model(X_test_tensor).cpu().numpy().flatten()

y_test_pred = (y_test_proba >= 0.5).astype(int)

test_auc_roc = roc_auc_score(y_test, y_test_proba)
test_auc_pr = average_precision_score(y_test, y_test_proba)
test_brier = brier_score_loss(y_test, y_test_proba)

print("Test Set Performance:")
print(f"AUC-ROC: {test_auc_roc:.4f}")
print(f"AUC-PR: {test_auc_pr:.4f}")
print(f"Brier Score: {test_brier:.4f}")

Test Set Performance:
AUC-ROC: 0.8962
AUC-PR: 0.8438
Brier Score: 0.1262


In [None]:
# Save model
torch.save({
    'model_state_dict': model.state_dict(),
    'input_dim': input_dim
}, MODEL_FILE)
print(f"Model saved to {MODEL_FILE}")

# Save predictions
predictions_df = pd.DataFrame({
    'true_label': y_val,
    'predicted_probability': y_val_proba,
    'predicted_label': y_val_pred,
    'dataset': 'validation'
})
predictions_df.to_csv(PREDICTIONS_FILE, index=False)
print(f"Predictions saved to {PREDICTIONS_FILE}")

# Save metrics
all_metrics = {
    'architecture': 'DeepResNet: Input → 768 (3x ResBlocks) → 384 (2x ResBlocks) → 192 (ResBlock) → 96 (ResBlock) → 1',
    'features': 'Deep residual connections, Batch Normalization, Focal Loss, Gradient Clipping',
    'total_residual_blocks': 7,
    'dropout': 'Progressive: 0.5 → 0.4 → 0.35 → 0.3 → 0.25',
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'weight_decay': WEIGHT_DECAY,
    'optimizer': 'AdamW',
    'scheduler': 'CosineAnnealingWarmRestarts',
    'loss_function': f'FocalLoss(alpha={FOCAL_ALPHA}, gamma={FOCAL_GAMMA})',
    'max_epochs': MAX_EPOCHS,
    'patience': PATIENCE,
    'gradient_clipping': 1.0,
    'validation_metrics': {
        'auc_roc': float(val_auc_roc),
        'auc_pr': float(val_auc_pr),
        'brier_score': float(val_brier),
        'dataset': 'Validation'
    },
    'test_metrics': {
        'auc_roc': float(test_auc_roc),
        'auc_pr': float(test_auc_pr),
        'brier_score': float(test_brier),
        'dataset': 'Test'
    }
}

with open(METRICS_FILE, 'w') as f:
    json.dump(all_metrics, f, indent=4)
print(f"Metrics saved to {METRICS_FILE}")