# Inicialización de los parámetros

Pytorch nos da la posibilidad de [inicializar](https://pytorch.org/docs/stable/nn.init.html) los parámetros de una red. Aunque es algo que no se suele modificar, y se suele dejar la manera en la que Pytorch lo hace por defecto, vamos a ver un ejemplo de inicialización por el método [Xavier](https://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf)

Descargamos un dataset

In [1]:
from sklearn import datasets

cancer = datasets.load_breast_cancer()

In [2]:
import pandas as pd

cancer_df = pd.DataFrame(cancer['data'], columns=cancer['feature_names'])
cancer_df['type'] = cancer['target']
cancer_df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,type
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


Creamos el dataset

In [3]:
import torch

class CancerDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        cols = [col for col in dataframe.columns if col != 'target']
        self.parameters = torch.from_numpy(dataframe[cols].values).type(torch.float32)
        self.targets = torch.from_numpy(dataframe['type'].values).type(torch.float32)
        self.targets = self.targets.reshape((len(self.targets), 1))

    def __len__(self):
        return len(self.parameters)

    def __getitem__(self, idx):
        parameters = self.parameters[idx]
        target = self.targets[idx]
        return parameters, target

ds = CancerDataset(cancer_df)
train_ds, valid_ds = torch.utils.data.random_split(ds, [int(0.8*len(ds)), len(ds) - int(0.8*len(ds))], generator=torch.Generator().manual_seed(42))

Ahora el dataloader

In [4]:
from torch.utils.data import DataLoader

BS_train = 64
BS_val = 128 # Solo hay 114 datos de validación, por lo que no se puede dividir en batches

train_dl = DataLoader(train_ds, batch_size=BS_train, shuffle=True)
val_dl = DataLoader(valid_ds, batch_size=BS_val, shuffle=False)

Creamos la red neuronal que inicialzia los pesos mediante el método [Xavier](https://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf)

In [5]:
from torch import nn

class CancerNeuralNetwork(nn.Module):
    def __init__(self, num_inputs, num_outputs, hidden_layers=[100, 50, 20]):
        super().__init__()
        self.network = torch.nn.Sequential(
            torch.nn.Linear(num_inputs, hidden_layers[0]),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_layers[0], hidden_layers[1]),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_layers[1], hidden_layers[2]),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_layers[2], num_outputs),
        )
        self.activation = torch.nn.Sigmoid()

        self.apply(self._init_weights)
    
    # Weights initialization using Xavier's method
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            nn.init.zeros_(module.bias)

    def forward(self, x):
        logits = self.network(x)
        probs = self.activation(logits)
        return logits, probs

num_inputs = 31
num_outputs = 1
model = CancerNeuralNetwork(num_inputs, num_outputs)

Llevamos la red a la GPU

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

model.to(device)

Using cuda device


CancerNeuralNetwork(
  (network): Sequential(
    (0): Linear(in_features=31, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=20, bias=True)
    (5): ReLU()
    (6): Linear(in_features=20, out_features=1, bias=True)
  )
  (activation): Sigmoid()
)

Creamos la función de entrenamiento

In [7]:
LR = 1e-3

loss_fn2 = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)

In [8]:
num_prints = 4

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # X and y to device
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        logits, _ = model(X)
        loss = loss_fn2(logits, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % int(len(dataloader)/num_prints) == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def val_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    model.eval()

    with torch.no_grad():
        for X, y in dataloader:
            # X and y to device
            X, y = X.to(device), y.to(device)
            
            logits, probs = model(X)
            test_loss += loss_fn2(logits, y).item()
            correct += (probs.round() == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Entrenamos

In [9]:
epochs = 14
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dl, model, loss_fn2, optimizer)
    val_loop(val_dl, model, loss_fn2)
print("Done!")

Epoch 1
-------------------------------
loss: 17.611856  [    0/  455]
loss: 16.588802  [  128/  455]
loss: 10.730642  [  256/  455]
loss: 9.457003  [  384/  455]
Test Error: 
 Accuracy: 59.6%, Avg loss: 3.365108 

Epoch 2
-------------------------------
loss: 3.455685  [    0/  455]
loss: 0.804640  [  128/  455]
loss: 5.263968  [  256/  455]
loss: 1.079523  [  384/  455]
Test Error: 
 Accuracy: 48.2%, Avg loss: 0.562191 

Epoch 3
-------------------------------
loss: 0.569557  [    0/  455]
loss: 0.462347  [  128/  455]
loss: 0.414672  [  256/  455]
loss: 1.439488  [  384/  455]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.482568 

Epoch 4
-------------------------------
loss: 0.425784  [    0/  455]
loss: 0.448654  [  128/  455]
loss: 0.497227  [  256/  455]
loss: 0.411788  [  384/  455]
Test Error: 
 Accuracy: 40.4%, Avg loss: 2.104557 

Epoch 5
-------------------------------
loss: 1.960162  [    0/  455]
loss: 1.266787  [  128/  455]
loss: 2.421741  [  256/  455]
loss: 2.754976  [  