# Multi-Layer Perceptron Model

Train a 3-layer neural network using PyTorch with early stopping for loan default prediction.

In [1]:
import os
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    brier_score_loss,
    classification_report,
    confusion_matrix
)

# Configuration
DATA_DIR = "../data"
MODELS_DIR = "../models"
RESULTS_DIR = "../results"

TRAIN_FILE = os.path.join(DATA_DIR, "train.csv")
VAL_FILE = os.path.join(DATA_DIR, "val.csv")
TEST_FILE = os.path.join(DATA_DIR, "test.csv")

MODEL_FILE = os.path.join(MODELS_DIR, "mlp_model.pth")
PREDICTIONS_FILE = os.path.join(RESULTS_DIR, "mlp_predictions.csv")
METRICS_FILE = os.path.join(RESULTS_DIR, "mlp_metrics.json")

TARGET_COL = "status"
RANDOM_STATE = 42

BATCH_SIZE = 256
MAX_EPOCHS = 100
LEARNING_RATE = 0.002
PATIENCE = 10
WEIGHT_DECAY = 1e-4
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

In [2]:
# Define improved MLP architecture
class SimpleMLP(nn.Module):
    def __init__(self, input_dim):
        super(SimpleMLP, self).__init__()
        
        self.network = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.network(x)

In [3]:
# Load data
train_df = pd.read_csv(TRAIN_FILE)
val_df = pd.read_csv(VAL_FILE)
test_df = pd.read_csv(TEST_FILE)

X_train = train_df.drop(columns=[TARGET_COL]).values
y_train = train_df[TARGET_COL].values

X_val = val_df.drop(columns=[TARGET_COL]).values
y_val = val_df[TARGET_COL].values

X_test = test_df.drop(columns=[TARGET_COL]).values
y_test = test_df[TARGET_COL].values

print(f"Training set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")

Training set: (179250, 67)
Validation set: (14867, 67)
Test set: (14867, 67)


In [4]:
# Create dataloaders
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).unsqueeze(1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
# Initialize model
input_dim = X_train.shape[1]
model = SimpleMLP(input_dim).to(DEVICE)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

print(f"Model initialized with {sum(p.numel() for p in model.parameters())} parameters")
print(f"Training on device: {DEVICE}")

Model initialized with 61633 parameters
Training on device: cpu




In [6]:
# Training functions
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        
        # Gradient clipping for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

def validate(model, val_loader, criterion):
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

In [7]:
# Train with early stopping
best_val_loss = float('inf')
patience_counter = 0
best_model_state = None

for epoch in range(MAX_EPOCHS):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    
    # Update learning rate scheduler
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1}/{MAX_EPOCHS} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_model_state = model.state_dict().copy()
    else:
        patience_counter += 1
    
    if patience_counter >= PATIENCE:
        print(f"Early stopping triggered after {epoch+1} epochs")
        break

model.load_state_dict(best_model_state)
print(f"Best validation loss: {best_val_loss:.4f}")

Epoch 1/100 - Train Loss: 0.4401, Val Loss: 0.3529


Epoch 2/100 - Train Loss: 0.4130, Val Loss: 0.3530


Epoch 3/100 - Train Loss: 0.4052, Val Loss: 0.3595


Epoch 4/100 - Train Loss: 0.4006, Val Loss: 0.3562


Epoch 5/100 - Train Loss: 0.3940, Val Loss: 0.3609


Epoch 6/100 - Train Loss: 0.3842, Val Loss: 0.3427


Epoch 7/100 - Train Loss: 0.3791, Val Loss: 0.3320


Epoch 8/100 - Train Loss: 0.3775, Val Loss: 0.3446


Epoch 9/100 - Train Loss: 0.3750, Val Loss: 0.3484


Epoch 10/100 - Train Loss: 0.3726, Val Loss: 0.3520


Epoch 11/100 - Train Loss: 0.3700, Val Loss: 0.3358


Epoch 12/100 - Train Loss: 0.3628, Val Loss: 0.3386


Epoch 13/100 - Train Loss: 0.3605, Val Loss: 0.3262


Epoch 14/100 - Train Loss: 0.3605, Val Loss: 0.3511


Epoch 15/100 - Train Loss: 0.3574, Val Loss: 0.3264


Epoch 16/100 - Train Loss: 0.3565, Val Loss: 0.3326


Epoch 17/100 - Train Loss: 0.3560, Val Loss: 0.3489


Epoch 18/100 - Train Loss: 0.3514, Val Loss: 0.3274


Epoch 19/100 - Train Loss: 0.3495, Val Loss: 0.3272


Epoch 20/100 - Train Loss: 0.3469, Val Loss: 0.3214


Epoch 21/100 - Train Loss: 0.3471, Val Loss: 0.3149


Epoch 22/100 - Train Loss: 0.3465, Val Loss: 0.3248


Epoch 23/100 - Train Loss: 0.3461, Val Loss: 0.3218


Epoch 24/100 - Train Loss: 0.3453, Val Loss: 0.3285


Epoch 25/100 - Train Loss: 0.3447, Val Loss: 0.3210


Epoch 26/100 - Train Loss: 0.3424, Val Loss: 0.3459


Epoch 27/100 - Train Loss: 0.3418, Val Loss: 0.4011


Epoch 28/100 - Train Loss: 0.3395, Val Loss: 0.3154


Epoch 29/100 - Train Loss: 0.3410, Val Loss: 0.3620


Epoch 30/100 - Train Loss: 0.3402, Val Loss: 0.3152


Epoch 31/100 - Train Loss: 0.3377, Val Loss: 0.3187
Early stopping triggered after 31 epochs
Best validation loss: 0.3149


In [8]:
# Evaluate on validation set
model.eval()
X_val_tensor = torch.FloatTensor(X_val).to(DEVICE)

with torch.no_grad():
    y_val_proba = model(X_val_tensor).cpu().numpy().flatten()

y_val_pred = (y_val_proba >= 0.5).astype(int)

val_auc_roc = roc_auc_score(y_val, y_val_proba)
val_auc_pr = average_precision_score(y_val, y_val_proba)
val_brier = brier_score_loss(y_val, y_val_proba)

print("Validation Set Performance:")
print(f"AUC-ROC: {val_auc_roc:.4f}")
print(f"AUC-PR: {val_auc_pr:.4f}")
print(f"Brier Score: {val_brier:.4f}")
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred))

Validation Set Performance:
AUC-ROC: 0.8849
AUC-PR: 0.8328
Brier Score: 0.0950

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.95      0.92     11203
           1       0.81      0.68      0.74      3664

    accuracy                           0.88     14867
   macro avg       0.86      0.82      0.83     14867
weighted avg       0.88      0.88      0.88     14867


Confusion Matrix:
[[10629   574]
 [ 1167  2497]]


In [9]:
# Evaluate on test set
X_test_tensor = torch.FloatTensor(X_test).to(DEVICE)

with torch.no_grad():
    y_test_proba = model(X_test_tensor).cpu().numpy().flatten()

y_test_pred = (y_test_proba >= 0.5).astype(int)

test_auc_roc = roc_auc_score(y_test, y_test_proba)
test_auc_pr = average_precision_score(y_test, y_test_proba)
test_brier = brier_score_loss(y_test, y_test_proba)

print("Test Set Performance:")
print(f"AUC-ROC: {test_auc_roc:.4f}")
print(f"AUC-PR: {test_auc_pr:.4f}")
print(f"Brier Score: {test_brier:.4f}")

Test Set Performance:
AUC-ROC: 0.8943
AUC-PR: 0.8424
Brier Score: 0.0917


In [10]:
# Save model
torch.save({
    'model_state_dict': model.state_dict(),
    'input_dim': input_dim
}, MODEL_FILE)
print(f"Model saved to {MODEL_FILE}")

# Save predictions
predictions_df = pd.DataFrame({
    'true_label': y_val,
    'predicted_probability': y_val_proba,
    'predicted_label': y_val_pred,
    'dataset': 'validation'
})
predictions_df.to_csv(PREDICTIONS_FILE, index=False)
print(f"Predictions saved to {PREDICTIONS_FILE}")

# Save metrics
all_metrics = {
    'architecture': 'Input → 256 → 128 → 64 → 32 → 1 (with BatchNorm)',
    'dropout': '0.4 → 0.4 → 0.3 → 0.2',
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'weight_decay': WEIGHT_DECAY,
    'max_epochs': MAX_EPOCHS,
    'patience': PATIENCE,
    'gradient_clipping': 1.0,
    'validation_metrics': {
        'auc_roc': float(val_auc_roc),
        'auc_pr': float(val_auc_pr),
        'brier_score': float(val_brier),
        'dataset': 'Validation'
    },
    'test_metrics': {
        'auc_roc': float(test_auc_roc),
        'auc_pr': float(test_auc_pr),
        'brier_score': float(test_brier),
        'dataset': 'Test'
    }
}

with open(METRICS_FILE, 'w') as f:
    json.dump(all_metrics, f, indent=4)
print(f"Metrics saved to {METRICS_FILE}")

Model saved to ../models/mlp_model.pth
Predictions saved to ../results/mlp_predictions.csv
Metrics saved to ../results/mlp_metrics.json
