In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class ANN(nn.Module):
    def __init__(self, input_dim, hidden_dim=[64,32], output_dim=1, use_batchnorm=False, dropout_rate=0.0):
        super().__init__()
        layers = []
        prev_dim = input_dim
        # hidden layers
        for h in hidden_dim:
            layers.append(nn.Linear(prev_dim,h))
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(h))
            layers.append(nn.ReLU())
            if dropout_rate > 0:
                layers.append(nn.Dropout(dropout_rate))
            prev_dim = h
        layers.append(nn.Linear(prev_dim, output_dim)) # output layer
        self.network = nn.Sequential(*layers) # build the network
        
    def forward(self, x):
        return self.network(x)

In [5]:
def train_model(X_train, y_train, X_val, y_val, input_dim, hidden_dim=[64,32], output_dim=1,
                use_batchnorm=False, dropout_rate=0.0, lr=0.001, batch_size=32, loss_fn='mse', optimizer_fn='adam', regularization='None', reg_lambda=0.001, use_lr_scheduler=False, early_stopping = False, patience=10, epochs = 100):
    
    # Convert data to PyTorch tensors
    torch.manual_seed(42)
    train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
    val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    model = ANN(input_dim, hidden_dim, output_dim, use_batchnorm, dropout_rate)
    
    # Loss
    if loss_fn == 'mse':
        criterion = nn.MSELoss()
    elif loss_fn == 'mae':
        criterion = nn.L1Loss()
    elif loss_fn == 'bce':
        criterion = nn.BCELoss()
    elif loss_fn == 'cross_entropy':
        criterion = nn.CrossEntropyLoss()
    else:
        raise ValueError("Unsupported loss function")
    
    # Optimizer
    if optimizer_fn == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=reg_lambda if regularization=='l2' else 0)
    elif optimizer_fn == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=reg_lambda if regularization=='l2' else 0)
    else:
        raise ValueError("Unsupported optimizer")
    
    # Learning rate scheduler
    scheduler = None
    if use_lr_scheduler:
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
        
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            y_pred = model(xb)
            loss = criterion(y_pred, yb)
            
            if regularization == 'l1':
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                loss += reg_lambda * l1_norm
                
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if scheduler:
            scheduler.step()
            
        model.eval()
        with torch.no_grad():
            val_loss = np.mean([criterion(model(xv), yv).item() for xv, yv in val_loader])
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}")
        
        if early_stopping:
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print("Early stopping triggered")
                    break
        
    return model

In [7]:
X_train = np.random.rand(1000, 20)
y_train = np.random.rand(1000, 1)
X_val = np.random.rand(200, 20)
y_val = np.random.rand(200, 1)
input_dim = X_train.shape[1]

model = train_model(X_train, y_train, X_val, y_val,
                    input_dim, hidden_dim=[64,32], output_dim=1,
                    use_batchnorm=True, dropout_rate=0.2, lr=0.001, batch_size=32,
                    loss_fn='cross_entropy', optimizer_fn='adam', regularization='l2', reg_lambda=0.001,
                    use_lr_scheduler=True, early_stopping=True, patience=5, epochs=100)


Epoch 1/100, Train Loss: 0.0000, Val Loss: 0.0000
Epoch 2/100, Train Loss: 0.0000, Val Loss: 0.0000
Epoch 3/100, Train Loss: 0.0000, Val Loss: 0.0000
Epoch 4/100, Train Loss: 0.0000, Val Loss: 0.0000
Epoch 5/100, Train Loss: 0.0000, Val Loss: 0.0000
Epoch 6/100, Train Loss: 0.0000, Val Loss: 0.0000
Early stopping triggered
