In [None]:
%%HTML
<!-- Mejorar visualización en proyector -->
<style>
.rendered_html {font-size: 1.2em; line-height: 150%;}
div.prompt {min-width: 0ex; padding: 0px;}
.container {width:95% !important;}
</style>

In [None]:
%autosave 0
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
import sklearn.datasets
import sklearn.model_selection

In [None]:
data, labels = sklearn.datasets.make_circles(n_samples=1000, noise=0.2, factor=0.25)
data, labels = sklearn.datasets.make_moons(n_samples=1000, noise=0.2)
data, labels = sklearn.datasets.make_blobs(n_samples=[250]*4, n_features=2, cluster_std=0.5,
                                          centers=np.array([[-1, 1], [1, 1], [-1, -1], [1, -1]]))
labels[labels==2] = 1; labels[labels==3] = 0;

train_idx, test_idx = next(sklearn.model_selection.ShuffleSplit(train_size=0.6).split(data, labels))
print("%d samples for train and %d for testing" %(len(train_idx), len(data)-len(train_idx)))
# Plot data
fig, ax = plt.subplots(figsize=(8, 5))
ax.scatter(data[labels==0, 0], data[labels==0, 1], marker='o', label="class 1", alpha=0.5)
ax.scatter(data[labels==1, 0], data[labels==1, 1], marker='o', label="class 2", alpha=0.5)
ax.set_xlabel('X1'); ax.set_ylabel('X2')
ax.grid(); plt.legend();

x_min, x_max = data[:, 0].min() - 0.5, data[:, 0].max() + 0.5
y_min, y_max = data[:, 1].min() - 0.5, data[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05), np.arange(y_min, y_max, 0.05))

# Red neuronal MLP con numpy

In [None]:
def logistic(z):
    return 1.0/(1.0 + np.exp(-z))

class MLP:
    
    def __init__(self, input_dim=2, hidden_dim=10, rstate=None):
        np.random.seed(rstate)        
        assert hidden_dim >0, "Neuronas ocultas debe ser mayor que cero"
        self.hidden_dim = hidden_dim
        output_dim = 1 # Clasificación binaria
        self.hidden_params = {'w':np.random.randn(input_dim, hidden_dim),
                              'b':np.random.randn(hidden_dim)}
        self.output_params = {'w': np.random.randn(hidden_dim, output_dim),
                              'b': np.random.randn(output_dim)} 

    def forward(self, x, only_output=True):
        z = logistic(np.dot(x, self.hidden_params['w']) + self.hidden_params['b'])  
        y = logistic(np.dot(z, self.output_params['w']) + self.output_params['b'])
        if only_output:
            return y
        else:
            return z, y
        
    def score(self, x, y, eps=1e-10):        
        yhat = self.forward(x)[:, 0] 
        logL = y*np.log(yhat+eps) + (1.0-y)*np.log(1.0-yhat+eps)
        return -logL
    
    def backward(self, x, y, eta=1e-2):
        zhat, yhat = self.forward(x, only_output=False)
        # ¿A que corresponde la ecuación siguiente?
        dL = -(y - yhat)  
        self.output_params['w'] -= eta*np.dot(zhat.T, dL)
        self.output_params['b'] -= eta*np.sum(dL) 
        # ¿A que corresponde la ecuación siguiente?
        grad_z = dL*np.repeat(self.output_params['w'].T, len(dL), axis=0)*zhat*(1-zhat)
        self.hidden_params['w'] -= eta*np.dot(x.T, grad_z)
        self.output_params['b'] -= eta*np.sum(grad_z)

In [None]:
nnet = MLP(hidden_dim=10)
n_epochs, eta = 1000, 1e-2
cost_history = np.zeros(shape=(n_epochs, 2))
fig, ax = plt.subplots(1, 2, figsize=(8, 4), tight_layout=True)

def update_plot(k):
    global nnet, cost_history    
    cost_history[k, 0] = np.mean(nnet.score(data[train_idx], labels[train_idx]))
    cost_history[k, 1] = np.mean(nnet.score(data[test_idx], labels[test_idx]))
    [ax_.cla() for ax_ in ax]
    Z = nnet.forward(np.c_[xx.ravel(), yy.ravel()])[:, 0]
    Z = Z.reshape(xx.shape)
    ax[0].contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=1., vmin=0, vmax=1)
    for i, (marker, label) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
        ax[0].scatter(data[labels==i, 0], data[labels==i, 1], s=10, color='k', marker=marker, alpha=0.5)
        ax[1].plot(np.arange(0, k+1, step=1), cost_history[:k+1, i], '-', label=label+" cost")
    plt.legend(); ax[1].grid()
    idx = np.random.permutation(len(train_idx))
    for i in range(len(idx)//10):
        idx_mb = train_idx[idx[i*10:(i+1)*10]]
        nnet.backward(data[idx_mb, :], labels[idx_mb, np.newaxis], eta=eta)

anim = animation.FuncAnimation(fig, update_plot, frames=n_epochs, 
                               interval=10, repeat=False, blit=False)

**Ejercicios**
- Experimente variando el número de capas, número de neuronas y tasa de aprendizaje. Comente sobre como se reflejan estas modificaciones en el desempeño de la red y en la  complejidad del hiperplano
- Discuta sobre la relación entre complejidad del hiperplano, capacidad de generalización y sobreajuste

# Red neuronal MLP con [PyTorch](https://pytorch.org/)

In [None]:
torch.nn.Linear?

In [None]:
import torch

# Implementar red neuronal
class myMLP(torch.nn.Module):

    def __init__(self, input_dim=2, hidden_dim=10, output_dim=1):        
        super(myMLP, self).__init__()  
        # Completar aquí
        
    def forward(self, x):
        # Completar aquí

# Crear conjuntos de entrenamiento y prueba
from torch.utils.data import DataLoader, TensorDataset, Subset 

torch_set = TensorDataset(torch.from_numpy(data.astype('float32')), 
                          torch.from_numpy(labels.astype('float32')))

torch_train_loader = DataLoader(Subset(torch_set, train_idx), shuffle=True, batch_size=32)
torch_valid_loader = DataLoader(Subset(torch_set, test_idx), shuffle=False, batch_size=256)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 3.5), tight_layout=True)

n_epochs = 1000
net = myMLP(input_dim=2, hidden_dim=100, output_dim=1)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
criterion = torch.nn.BCEWithLogitsLoss()
running_loss = np.zeros(shape=(n_epochs, 2))
sigmoid = torch.nn.Sigmoid()

def train_one_epoch(net):
    train_loss, valid_loss = 0.0, 0.0
    for sample_data, sample_label in torch_train_loader:
        output = net(sample_data)
        optimizer.zero_grad()        
        loss = criterion(output[:, 0], sample_label)  
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
    for sample_data, sample_label in torch_valid_loader:
        output = net(sample_data)
        loss = criterion(output[:, 0], sample_label)  
        valid_loss += loss.item()
    return train_loss/torch_train_loader.__len__(), valid_loss/torch_valid_loader.__len__()
    
def update_plot(k):
    global net, running_loss
    [ax_.cla() for ax_ in ax]
    running_loss[k, 0], running_loss[k, 1] = train_one_epoch(net)
    Z = net.forward(torch.from_numpy(np.c_[xx.ravel(), yy.ravel()].astype('float32')))
    Z = sigmoid(Z).detach().numpy().reshape(xx.shape)
    ax[0].contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=1., vmin=0, vmax=1)
    for i, (marker, label) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
        ax[0].scatter(data[labels==i, 0], data[labels==i, 1], color='k', s=10, marker=marker, alpha=0.5)
        ax[1].plot(np.arange(0, k+1, step=1), running_loss[:k+1, i], '-', label=label+" cost")
    plt.legend(); ax[1].grid()
    
anim = animation.FuncAnimation(fig, update_plot, frames=n_epochs, 
                               interval=10, repeat=False, blit=False)

## Opcional: Implementación usando tensorflow

In [None]:
import tensorflow as tf
import time
from os.path import join

tf.reset_default_graph()
tf_input = tf.placeholder(tf.float32, [None, 2], name='input')
tf_label = tf.placeholder(tf.float32, [None, 1], name='target')

Nh = 10
nepochs = 500  

with tf.variable_scope('Hidden_layer'):
    bh = tf.Variable(tf.zeros([Nh]), name="bias", dtype=tf.float32)
    wh = tf.Variable(tf.random_uniform([2, Nh], -1.0, 1.0), name="weight", dtype=tf.float32)
    z = tf.nn.tanh(tf.matmul(tf_input, wh) + bh)

with tf.variable_scope('Output_layer'):
    bo = tf.Variable(tf.zeros([1]), name="bias", dtype=tf.float32)
    wo = tf.Variable(tf.random_uniform([Nh, 1], -1.0, 1.0), name="weight", dtype=tf.float32)
    y = tf.add(tf.matmul(z, wo), bo)

with tf.variable_scope('Optimizer'):
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf_label, logits=y)
    loss_op = tf.reduce_mean(cross_entropy)  
    optimizer = tf.train.AdamOptimizer(1e-2)
    train_op = optimizer.minimize(loss_op) 
    init = tf.global_variables_initializer()

with tf.name_scope('summaries'):
    tf.summary.scalar('loss', loss_op)
    tf.summary.histogram('output_weight', wo)
    tf.summary.histogram('output_bias', bo)

merged = tf.summary.merge_all()

**Ejercicios:** 
- Visualice el grafo, las curvas de aprendizaje y los histogramas de parámetros usando la herramienta tensorboard
- Modifique el código que genera el grafo para agregar una segunda capa oculta
- Estudie la función de mayor abstracción tf.layers.dense y usela para modificar el código que genera el grafo
- ¿Cómo modificaría el código de entrenamiento para usar mini-batches?

**Instrucciones tensorboard**
1. Ejecutar: tensorboard --logdir /tmp/tensorboard/ 
2. Apuntar el navegador a localhost:6006

In [None]:
log_dir = join("/tmp/tensorboard/", str(time.time()))
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(join(log_dir, 'train'), sess.graph)
    test_writer = tf.summary.FileWriter(join(log_dir, 'test'), sess.graph)
    sess.run(init)
    train_loss = np.zeros(shape=(nepochs))
    test_loss = np.zeros(shape=(nepochs))
    for i, epoch in enumerate(range(nepochs)):
        # run the training operation
        _, train_loss[i], summary = sess.run([train_op, loss_op, merged], feed_dict={tf_input: data_train, 
                                                         tf_label: np.reshape(labels_train, [-1, 1])})
        train_writer.add_summary(summary, i)
        pred_test, test_loss[i], summary = sess.run([y, loss_op, merged], feed_dict={tf_input: data_test, 
                                                                 tf_label: np.reshape(labels_test, [-1, 1])})
        test_writer.add_summary(summary, i)

    Z = sess.run(y, feed_dict={tf_input: (np.c_[xx.ravel(), yy.ravel()]).astype('float32')})
    Z = Z.reshape(xx.shape)

fig = plt.figure(figsize=(14, 5))
ax = fig.add_subplot(1, 2, 1)
ax.contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=0.75)
ax.scatter(data[labels==0, 0], data[labels==0, 1], color='k', marker='o', alpha=0.5)
ax.scatter(data[labels==1, 0], data[labels==1, 1], color='k', marker='x', alpha=0.5)
ax = fig.add_subplot(1, 2, 2)
ax.plot(train_loss, label='Train loss', linewidth=2)
ax.plot(test_loss, label='Test loss', linewidth=2)
plt.grid()
plt.legend()
plt.tight_layout();