In [None]:
%matplotlib notebook
import torch
import numpy as np
import matplotlib.pyplot as plt

# Preparación de datos

- Datos de ejemplo: Problema no linealmente separable
- DataSet y DataLoaders

In [None]:
import sklearn.datasets
data, labels = sklearn.datasets.make_circles(n_samples=1000, noise=0.2, factor=0.25)
#data, labels = sklearn.datasets.make_moons(n_samples=1000, noise=0.2)
#data, labels = sklearn.datasets.make_blobs(n_samples=[250]*4, n_features=2, cluster_std=0.5,
#                                          centers=np.array([[-1, 1], [1, 1], [-1, -1], [1, -1]]))
#labels[labels==2] = 1; labels[labels==3] = 0;

fig, ax = plt.subplots(figsize=(6, 4), tight_layout=True)
for k, marker in enumerate(['x', 'o']):
    ax.scatter(data[labels==k, 0], data[labels==k, 1], c='k', s=20, marker=marker, alpha=0.75)
    
# Para las gráficas
x_min, x_max = data[:, 0].min() - 0.5, data[:, 0].max() + 0.5
y_min, y_max = data[:, 1].min() - 0.5, data[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

import sklearn.model_selection
# Separamos el data set en entrenamiento y validación
train_idx, valid_idx = next(sklearn.model_selection.ShuffleSplit(train_size=0.75).split(data, labels))


# Crear conjuntos de entrenamiento y prueba
from torch.utils.data import DataLoader, TensorDataset, Subset 

# Creamos un conjunto de datos en formato tensor
torch_set = TensorDataset(torch.from_numpy(data.astype('float32')), 
                          torch.from_numpy(labels.astype('float32')))

# Data loader de entrenamiento
torch_train_loader = DataLoader(Subset(torch_set, train_idx), shuffle=True, batch_size=32)
# Data loader de validación
torch_valid_loader = DataLoader(Subset(torch_set, valid_idx), shuffle=False, batch_size=256)

# Perceptrón multicapa

- ¿Cómo cambia el resultado según la cantidad de capas y neuronas ocultas?
- ¿Cómo cambia el resultado usando distintas inicializaciones?
- ¿Cómo cambia el resultado usando distintas funciones de activación?


In [None]:
class MultiLayerPerceptron(torch.nn.Module):

    def __init__(self, neurons=[2, 1]): 
        super(type(self), self).__init__()
        
        # Podemos usar ModuleList para registrar una lista de submodulos
        self.hidden = torch.nn.ModuleList()
        for k in range(len(neurons)-2):
            self.hidden.append(torch.nn.Linear(neurons[k], neurons[k+1]))                
        
        self.output = torch.nn.Linear(neurons[-2], neurons[-1])
        self.activation = torch.nn.Sigmoid()
        
    def forward(self, x):
        # ModuleList es un objeto iterable
        for k, layer in enumerate(self.hidden):
            x = self.activation(layer(x))

        return self.output(x).squeeze(1)

In [None]:
def train_one_step(batch): 
    model.train()
    optimizer.zero_grad()
    x, y = batch
    yhat = model.forward(x)
    loss = criterion(yhat, y)
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate_one_step(batch):
    model.eval()
    with torch.no_grad():
        x, y = batch
        yhat = model.forward(x)
        loss = criterion(yhat, y)
        yhat = (torch.sigmoid(yhat) > 0.5).float()
        return yhat, y.unsqueeze(1), loss.item()

def draw_plots(epoch):
    XY = torch.from_numpy(np.c_[xx.ravel(), yy.ravel()].astype('float32'))
    Z = torch.sigmoid(model.forward(XY)).detach().numpy().reshape(xx.shape) 
    [ax_.cla() for ax_ in ax]
    ax[0].contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=1., vmin=0, vmax=1)
    for i, (marker, name) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
        ax[0].scatter(data[labels==i, 0], data[labels==i, 1], color='k', s=10, marker=marker, alpha=0.5)
        ax[1].plot(np.arange(0, epoch+1, step=1), running_loss[:epoch+1, i], '-', label=name+" cost")
    plt.legend(); ax[1].grid()
    fig.canvas.draw()
    
fig, ax = plt.subplots(1, 2, figsize=(8, 3.5), tight_layout=True)

In [None]:
torch.manual_seed(1234) # Inicialización

neurons = [2, 2, 1] # Arquitectura
model = MultiLayerPerceptron(neurons)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')

max_epochs = 100    
running_loss = np.zeros(shape=(max_epochs, 2))
best_valid_loss = np.inf

for epoch in range(max_epochs):
    train_loss, valid_loss = 0.0, 0.0
    for batch in torch_train_loader:
        train_loss += train_one_step(batch)
    running_loss[epoch, 0] = train_loss/torch_train_loader.dataset.__len__()    
    # Loop de validación
    for batch in torch_valid_loader:
        valid_loss += evaluate_one_step(batch)[-1]
    running_loss[epoch, 1] = valid_loss/torch_valid_loader.dataset.__len__()    
    # Guardar modelo si es el mejor hasta ahora    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save({'current_epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'current_valid_loss': valid_loss}, 
                   'best_model.pt')
    draw_plots(epoch)

- Cada neurona es un hiperplano
- La primera capa son hiperplanos en el espacio de los datos
- La segunda capa es un hiperplano en la salida de la primera capa
- La segunda capa no es un hiperplano en el espacio de los datos

In [None]:
model = MultiLayerPerceptron(neurons)
model.load_state_dict(torch.load('best_model.pt')['model_state_dict'])

XY = torch.from_numpy(np.c_[xx.ravel(), yy.ravel()].astype('float32'))
Z = model.activation(model.hidden[0](XY)).detach().numpy()
fig, ax = plt.subplots(2, 2, figsize=(8, 7), tight_layout=True)
for k in range(2):
    ax[0, k].set_title(k+1)
    ax[0, k].contourf(xx, yy, Z[:, k].reshape(xx.shape), 
                   cmap=plt.cm.RdBu_r, alpha=1., vmin=0, vmax=1)
    for i, (marker, name) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
        ax[0, k].scatter(data[labels==i, 0], data[labels==i, 1], color='k', s=10, marker=marker, alpha=0.5)

Z = model.activation(model.forward(XY)).detach().numpy()
ax[1, 1].contourf(xx, yy, Z.reshape(xx.shape), cmap=plt.cm.RdBu_r, alpha=1.)
for i, (marker, name) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
    ax[1, 1].scatter(data[labels==i, 0], data[labels==i, 1], color='k', s=10, marker=marker, alpha=0.5)

Z = model.activation(model.output(XY)).detach().numpy()
ax[1, 0].contourf(xx, yy, Z.reshape(xx.shape), cmap=plt.cm.RdBu_r, alpha=1.)
ax[1, 0].set_xlim([0, 1]); ax[1, 0].set_ylim([0, 1]);
ax[1, 0].set_xlabel('1'); ax[1, 0].set_ylabel('2');

### Entrenamiento usando Ignite

Ignite es una librería de alto nivel 

Provee engines, eventos, manejadores y métricas

- Los engines se encargan de entrenar y evaluar la red. Se ponen en marcha usando el atributo `run`
- Una métrica es un valor con el que evaluamos nuestra red (Loss, accuracy, f1-score)
- Los manejadores nos permiten realizar acciones cuando se cumple un evento, por ejemplo
    - Imprimir los resultados
    - Guardar el mejor modelo


In [None]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss
from ignite.handlers import ModelCheckpoint


torch.manual_seed(1234) # Inicialización
neurons = [2, 10, 1]
model = MultiLayerPerceptron(neurons)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
max_epochs = 100  

trainer = create_supervised_trainer(model, optimizer, criterion) # Creo un engine para entrenar
metrics = {'Loss': Loss(criterion)}
evaluator = create_supervised_evaluator(model, metrics=metrics) # Creo un engine para validar

@trainer.on(Events.EPOCH_COMPLETED(every=10)) # Cada 10 epocas
def log_results(engine):
    evaluator.run(torch_valid_loader) # Evaluo el conjunto de validación
    loss = evaluator.state.metrics['Loss']
    print(f"Epoca: {engine.state.epoch} \t Loss: {loss:.2f}")
    
best_model_handler = ModelCheckpoint(dirname='.', require_empty=False, filename_prefix="best", n_saved=1,
                                     score_function=lambda engine: -engine.state.metrics['Loss'],
                                     score_name="val_loss")

evaluator.add_event_handler(Events.COMPLETED, # Esto se ejecuta cada ves que termine el loop de validación
                            best_model_handler, {'mymodel': model})

trainer.run(torch_train_loader, max_epochs=max_epochs)

In [None]:
model = MultiLayerPerceptron(neurons)
model.load_state_dict(torch.load('best_mymodel_val_loss=-2.8199.pt'))

fig, ax = plt.subplots(1, 1, figsize=(4, 3.5), tight_layout=True)
Z = model.activation(model.forward(XY)).detach().numpy()
ax.contourf(xx, yy, Z.reshape(xx.shape), cmap=plt.cm.RdBu_r, alpha=1.)
for i, (marker, name) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
    ax.scatter(data[labels==i, 0], data[labels==i, 1], color='k', s=10, marker=marker, alpha=0.5)

Engines customizados

In [None]:
from ignite.engine import Engine

# Esto es lo que hace el engine de entrenamiento
def train_one_step(engine, batch):
    optimizer.zero_grad()
    x, y = batch
    yhat = model.forward(x)
    loss = criterion(yhat, y.unsqueeze(1))
    loss.backward()
    optimizer.step()
    return loss.item() # Este output puede llamar luego como trainer.state.output

# Esto es lo que hace el engine de evaluación
def evaluate_one_step(engine, batch):
    with torch.no_grad():
        x, y = batch
        yhat = model.forward(x)
        yhat = (torch.sigmoid(yhat) > 0.5).float()
        return yhat, y.unsqueeze(1)

trainer = Engine(train_one_step)
evaluator = Engine(evaluate_one_step)
for name, metric in metrics.items():
    metric.attach(evaluator, name)

# Pytorch, Ignite y Tensorboard

Podemos usar la herramienta [tensorboard](https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html) para visualizar el entrenamiento de la red en vivo y/o comparar distintos entrenamientos

- Instalar tensorboard versión 1.15 o mayor con conda

- Escribir en un terminal

        tensorboard --logdir=/tmp/tensorboard/

- Apuntar el navegador a 

        https://localhost:6006 

In [None]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss
from ignite.handlers import ModelCheckpoint
from torch.utils.tensorboard import SummaryWriter

torch.manual_seed(1234) # Inicialización
neurons = [2, 10, 1]
model = MultiLayerPerceptron(neurons)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
max_epochs = 100  

trainer = create_supervised_trainer(model, optimizer, criterion) # Creo un engine para entrenar
metrics = {'Loss': Loss(criterion)}
evaluator = create_supervised_evaluator(model, metrics=metrics) # Creo un engine para validar


with SummaryWriter(log_dir='/tmp/tensorboard/run1') as writer:

    @trainer.on(Events.EPOCH_COMPLETED(every=1)) # Cada 10 epocas
    def log_results(engine):
        evaluator.run(torch_train_loader) # Evaluo el conjunto de entrenamiento
        loss = evaluator.state.metrics['Loss']
        #print(f"Epoca: {engine.state.epoch} \t Train Loss: {loss:.2f}")
        writer.add_scalar("train/loss", loss, engine.state.epoch)
        
    @trainer.on(Events.EPOCH_COMPLETED(every=1)) # Cada 10 epocas
    def log_results(engine):
        evaluator.run(torch_valid_loader) # Evaluo el conjunto de entrenamiento
        loss = evaluator.state.metrics['Loss']
        #print(f"Epoca: {engine.state.epoch} \t Valid Loss: {loss:.2f}")
        writer.add_scalar("valid/loss", loss, engine.state.epoch)

    best_model_handler = ModelCheckpoint(dirname='.', require_empty=False, filename_prefix="best", n_saved=1,
                                         score_function=lambda engine: -engine.state.metrics['Loss'],
                                         score_name="val_loss")

    evaluator.add_event_handler(Events.COMPLETED, # Esto se ejecuta cada ves que termine el loop de validación
                                best_model_handler, {'mymodel': model})

    trainer.run(torch_train_loader, max_epochs=max_epochs)