# Compare L1 vs L2 on Wine
Train both with same splits and hyperparameters; report val/test accuracy.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from dataset_wine import get_wine_loaders, set_seed

set_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_loader, val_loader, test_loader, input_dim, num_classes = get_wine_loaders(batch_size=64)
input_dim, num_classes

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim: int, num_classes: int, h1: int = 64, h2: int = 32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, h1), nn.ReLU(),
            nn.Linear(h1, h2), nn.ReLU(),
            nn.Linear(h2, num_classes)
        )
    def forward(self, x):
        return self.net(x)

def accuracy(model, loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            pred = logits.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    return correct / total

def train_l1(model, train_loader, val_loader, epochs=50, lr=1e-3, l1_lambda=1e-4):
    opt = optim.Adam(model.parameters(), lr=lr)
    ce = nn.CrossEntropyLoss()
    best = {'val_acc': 0.0, 'state': None}
    for ep in range(1, epochs+1):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            opt.zero_grad()
            logits = model(x)
            ce_loss = ce(logits, y)
            l1 = 0.0
            for name, p in model.named_parameters():
                if p.requires_grad and p.dim() > 1:
                    l1 = l1 + p.abs().sum()
            loss = ce_loss + l1_lambda * l1
            loss.backward(); opt.step()
        val_acc = accuracy(model, val_loader)
        if val_acc > best['val_acc']:
            best['val_acc'] = val_acc
            best['state'] = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    if best['state'] is not None: model.load_state_dict(best['state'])
    return model, best['val_acc']

def train_l2(model, train_loader, val_loader, epochs=50, lr=1e-3, weight_decay=1e-4):
    opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    ce = nn.CrossEntropyLoss()
    best = {'val_acc': 0.0, 'state': None}
    for ep in range(1, epochs+1):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            opt.zero_grad(); logits = model(x); loss = ce(logits, y); loss.backward(); opt.step()
        val_acc = accuracy(model, val_loader)
        if val_acc > best['val_acc']:
            best['val_acc'] = val_acc
            best['state'] = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    if best['state'] is not None: model.load_state_dict(best['state'])
    return model, best['val_acc']

# Train L1
m1 = MLP(input_dim, num_classes).to(device)
m1, val1 = train_l1(m1, train_loader, val_loader, epochs=50, lr=1e-3, l1_lambda=1e-4)
test1 = accuracy(m1, test_loader)
# Train L2
m2 = MLP(input_dim, num_classes).to(device)
m2, val2 = train_l2(m2, train_loader, val_loader, epochs=50, lr=1e-3, weight_decay=1e-4)
test2 = accuracy(m2, test_loader)

print({"L1": {"val": val1, "test": test1}, "L2": {"val": val2, "test": test2}})
print('Better on validation:', 'L1' if val1>val2 else 'L2' if val2>val1 else 'Tie')
