## L2 Regularization (Weight Decay) in practice

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_formats = ['svg']
%matplotlib inline
plt.style.use("dark_background")

from sklearn.model_selection import train_test_split

In [2]:
import seaborn as sns

df_iris = sns.load_dataset("iris")

In [3]:
from sklearn.preprocessing import LabelEncoder
#transformamos las etiquetas (y) de los datos a ser números desde 0 a n_clases-1
label_enc = LabelEncoder()

labels = label_enc.fit_transform(df_iris.species)

X = torch.tensor(df_iris.iloc[:,0:4].values).float() #matriz de características
y = torch.tensor(labels).long() #vector de las etiquetas transformadas a números (tipo de dato long() -> entero) #podiamos tambien haber hecho df.iris[df.iris.species=="setosa"] == 1 y así

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

In [6]:
batch_size = 16 #suelen ser potencias de 2

train_loader = DataLoader(train_data,
                         batch_size=batch_size,
                         shuffle=True)

test_loader = DataLoader(test_data,
                        batch_size=test_data.tensors[0].shape[0])
#train_data.tensors[0] accede a las características X
#para predicciones del modelo, los lotes son individuales

<img src="l2 weight decay details.jpg" alt="info" width=700>

In [7]:
class Model(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        self.inp = nn.Linear(4, 64),
        self.hid = nn.Linear(64, 64),
        self.out = nn.Linear(64, 3)
    
    def forward(self, x):
        
        x = F.relu(self.inp(x))
        x = F.relu(self.hid(x))
        x = self.out(x)
        
        return x
        

## Ajustaremos el tamaño del coeficiente de penalización $\large\lambda$ a través del parámetro `l2_lambda`

In [8]:
def create_model(l2_lambda):
    
    model = Model()
    loss_func = nn.CrossEntropyLoss()
    optim = torch.optim.SGD(params=model.parameters(),
                       lr=0.005,
                       weight_decay=l2_lambda) #aquí se ubicará el parámetro
    
    return model, loss_func, optim

In [9]:
def train_model(model, loss_func, optim):
    
    train_accs = []
    test_accs = []
    losses = []
    
    for epoch in range(n_epochs):
        
        
        batch_accs = []
        batch_loss = []
        
        for (X_batch, y_batch) in train_loader:
            
            y_hat = model(X_batch)
            
            loss = loss_func(y_hat, y_batch)
            
            optim.zero_grad()
            loss.backward()
            optim.step()
            

            batch_accs.append(100*torch.mean((torch.argmax(y_hat, axis=1) == y_batch).float()).item())
            batch_loss.append(loss.item())
        
        #al final de cada epoch
        train_accs.append(np.mean(batch_accs))
        losses.append(np.mean(batch_loss))
        
        #test
        X_test, y_test = next(iter(test_loader)) #extraemos los datos del generador
        preds = model(X_test)
        test_accs.append(100*torch.mean((torch.argmax(preds, axis=1) == y_test).float()).item())
        
    
    return train_accs, test_accs, losses