In [None]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation

In [None]:
def create_noisy_xor(N_per_cluster=500, stddev_noise=0.4):
    data = stddev_noise*np.random.randn(4*N_per_cluster, 2)
    data[0*N_per_cluster:1*N_per_cluster, :] += [1.0, -1.0]
    data[1*N_per_cluster:2*N_per_cluster, :] += [-1.0, 1.0]
    data[2*N_per_cluster:3*N_per_cluster :] += [-1.0, -1.0]
    data[3*N_per_cluster:4*N_per_cluster, :] += [1.0, 1.0]
    #data = (data - np.mean(X, axis=0))/np.std(X, axis=0)
    labels = np.zeros(shape=(4*N_per_cluster,), dtype=int)
    labels[2*N_per_cluster:] = 1.0
    NP = np.random.permutation(4*N_per_cluster)
    return data[NP, :], labels[NP]

# Create training and test set
data , labels = create_noisy_xor()
data_train, labels_train = data[:500, :], labels[:500]
data_test, labels_test = data[500:, :], labels[500:]
print("%d samples for train and %d for testing" %(len(data_train), len(data)-len(data_train)))
# Plot data
fig, ax = plt.subplots(figsize=(8, 5))
ax.scatter(data[labels==0, 0], data[labels==0, 1], marker='o', label="class 1", alpha=0.5)
ax.scatter(data[labels==1, 0], data[labels==1, 1], marker='o', label="class 2", alpha=0.5)
ax.set_xlabel('X1'); ax.set_ylabel('X2')
ax.grid(); plt.legend();

In [None]:
def logistic(z):
    return 1.0/(1.0 + np.exp(-z))

class MLP:
    
    def __init__(self, input_dim=2, hidden_dim=10, rstate=None):
        np.random.seed(rstate)        
        assert hidden_dim >0, "Neuronas ocultas debe ser mayor que cero"
        self.hidden_dim = hidden_dim
        output_dim = 1 # Clasificación binaria
        self.hidden_params = {'w':np.random.randn(input_dim, hidden_dim),
                              'b':np.random.randn(hidden_dim)}
        self.output_params = {'w': np.random.randn(hidden_dim, output_dim),
                              'b': np.random.randn(output_dim)} 

    def forward(self, x, only_output=True):
        z = logistic(np.dot(x, self.hidden_params['w']) + self.hidden_params['b'])  
        y = logistic(np.dot(z, self.output_params['w']) + self.output_params['b'])
        if only_output:
            return y
        else:
            return z, y
        
    def score(self, x, y, eps=1e-10):        
        yhat = self.forward(x)[:, 0] 
        logL = y*np.log(yhat+eps) + (1.0-y)*np.log(1.0-yhat+eps)
        return -logL
    
    def backward(self, x, y, eta=1e-2):
        zhat, yhat = self.forward(x, only_output=False)
        dL = -(y - yhat)  # La derivada de -logL/dyhat x dlogistic
        self.output_params['w'] -= eta*np.dot(zhat.T, dL)
        self.output_params['b'] -= eta*np.sum(dL)        
        grad_z = dL*np.repeat(self.output_params['w'].T, len(dL), axis=0)*zhat*(1-zhat)
        self.hidden_params['w'] -= eta*np.dot(x.T, grad_z)
        self.output_params['b'] -= eta*np.sum(grad_z)
        

# Modifique la red para hacer regresión en ves de clasificación
# Modifique la red para usar sigmoide en vez de tangente hiperbólica en la capa oculta

In [None]:
nnet = MLP(hidden_dim=2)
n_epochs, eta = 1000, 1e-3
cost_history = np.zeros(shape=(n_epochs, 2))
x_min, x_max = data[:, 0].min() - 0.5, data[:, 0].max() + 0.5
y_min, y_max = data[:, 1].min() - 0.5, data[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05), np.arange(y_min, y_max, 0.05))
fig, ax = plt.subplots(1, 2, figsize=(8, 4), tight_layout=True)

def update_plot(k):
    global nnet, cost_history    
    cost_history[k, 0] = np.mean(nnet.score(data_train, labels_train))
    cost_history[k, 1] = np.mean(nnet.score(data_test, labels_test))
    [ax_.cla() for ax_ in ax]
    Z = nnet.forward(np.c_[xx.ravel(), yy.ravel()])[:, 0]
    Z = Z.reshape(xx.shape)
    ax[0].contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=0.75)
    for i, (marker, label) in enumerate(zip(['o', 'x'], ['Train', 'Test'])):
        ax[0].scatter(data[labels==i, 0], data[labels==i, 1], color='k', marker=marker, alpha=0.5)
        ax[1].plot(np.arange(0, k+1, step=1), cost_history[:k+1, i], '-', label=label+" cost")
    plt.legend(); ax[1].grid()
    idx = np.random.permutation(len(data_train))
    for i in range(len(idx)//10):
        idx_mb = idx[i*10:(i+1)*10]
        nnet.backward(data_train[idx_mb, :], labels_train[idx_mb, np.newaxis], eta=eta)

anim = animation.FuncAnimation(fig, update_plot, frames=n_epochs, 
                               interval=10, repeat=False, blit=False)

**Ejercicios**
- Experimente variando el número de capas, número de neuronas y tasa de aprendizaje. Comente sobre como se reflejan estas modificaciones en el desempeño de la red y en la  complejidad del hiperplano
- Experimente aumentando el ruido de los datos
- Discuta sobre la relación entre complejidad del hiperplano, capacidad de generalización y sobreajuste

## Implementación usando pytorch

**Ejercicio:** Modifique la clase para agregando una segunda capa oculta

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset 

torch_train_set = TensorDataset(torch.from_numpy(data_train.astype('float32')), 
                                torch.from_numpy(labels_train.astype('float32')))
torch_train_loader = DataLoader(torch_train_set, shuffle=True, batch_size=32)

class myMLP(torch.nn.Module):

    def __init__(self, input_dim=2, hidden_dim=10, output_dim=1):        
        super(myMLP, self).__init__()        
        self.fc1 = torch.nn.Linear(in_features=input_dim,  out_features=hidden_dim, bias=True)
        self.fc2 = torch.nn.Linear(in_features=hidden_dim, out_features=output_dim, bias=True)
        self.hidden_activation = torch.nn.ReLU()
        
    def forward(self, x):
        z = self.hidden_activation(self.fc1(x))
        y = self.fc2(z)
        return y

**Ejercicio:** Modifique el siguiente código para correr en GPU. 

Por qué no se ve un speed-up considerable en este caso?

In [None]:
from  tqdm import tqdm_notebook

#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net = myMLP(dim_input=2, dim_hidden=10, dim_classes=1)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
criterion = torch.nn.BCEWithLogitsLoss()
#net.to(device)

# Main training loop
nepoch = 500
running_loss = np.zeros(shape=(nepoch,))
for k in tqdm_notebook(range(nepoch), desc='Epochs'): 
    for sample_data, sample_label in torch_train_loader:
        output = net(sample_data)
        optimizer.zero_grad()        
        loss = criterion(output[:, 0], sample_label)  
        running_loss[k] += loss.item()/torch_train_loader.__len__()
        loss.backward()
        optimizer.step()

In [None]:
Z = net.forward(torch.from_numpy(np.c_[xx.ravel(), yy.ravel()].astype('float32')))
Z = Z.detach().numpy().reshape(xx.shape)
fig = plt.figure(figsize=(14, 5))
ax = fig.add_subplot(1, 2, 1)
ax.contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=0.75)
ax.scatter(data[labels==0, 0], data[labels==0, 1], color='k', marker='o', alpha=0.5)
ax.scatter(data[labels==1, 0], data[labels==1, 1], color='k', marker='x', alpha=0.5)
ax = fig.add_subplot(1, 2, 2)
ax.plot(running_loss, label='Train loss', linewidth=2)
#ax.plot(test_loss, label='Test loss', linewidth=2)
plt.grid()
plt.legend()
plt.tight_layout();

## Opcional: Implementación usando tensorflow

In [None]:
import tensorflow as tf
import time
from os.path import join

tf.reset_default_graph()
tf_input = tf.placeholder(tf.float32, [None, 2], name='input')
tf_label = tf.placeholder(tf.float32, [None, 1], name='target')

Nh = 10
nepochs = 500  

with tf.variable_scope('Hidden_layer'):
    bh = tf.Variable(tf.zeros([Nh]), name="bias", dtype=tf.float32)
    wh = tf.Variable(tf.random_uniform([2, Nh], -1.0, 1.0), name="weight", dtype=tf.float32)
    z = tf.nn.tanh(tf.matmul(tf_input, wh) + bh)

with tf.variable_scope('Output_layer'):
    bo = tf.Variable(tf.zeros([1]), name="bias", dtype=tf.float32)
    wo = tf.Variable(tf.random_uniform([Nh, 1], -1.0, 1.0), name="weight", dtype=tf.float32)
    y = tf.add(tf.matmul(z, wo), bo)

with tf.variable_scope('Optimizer'):
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf_label, logits=y)
    loss_op = tf.reduce_mean(cross_entropy)  
    optimizer = tf.train.AdamOptimizer(1e-2)
    train_op = optimizer.minimize(loss_op) 
    init = tf.global_variables_initializer()

with tf.name_scope('summaries'):
    tf.summary.scalar('loss', loss_op)
    tf.summary.histogram('output_weight', wo)
    tf.summary.histogram('output_bias', bo)

merged = tf.summary.merge_all()
    


**Ejercicios:** 
- Visualice el grafo, las curvas de aprendizaje y los histogramas de parámetros usando la herramienta tensorboard
- Modifique el código que genera el grafo para agregar una segunda capa oculta
- Estudie la función de mayor abstracción tf.layers.dense y usela para modificar el código que genera el grafo
- ¿Cómo modificaría el código de entrenamiento para usar mini-batches?

**Instrucciones tensorboard**
1. Ejecutar: tensorboard --logdir /tmp/tensorboard/ 
2. Apuntar el navegador a localhost:6006

In [None]:
log_dir = join("/tmp/tensorboard/", str(time.time()))
with tf.Session() as sess:
    train_writer = tf.summary.FileWriter(join(log_dir, 'train'), sess.graph)
    test_writer = tf.summary.FileWriter(join(log_dir, 'test'), sess.graph)
    sess.run(init)
    train_loss = np.zeros(shape=(nepochs))
    test_loss = np.zeros(shape=(nepochs))
    for i, epoch in enumerate(range(nepochs)):
        # run the training operation
        _, train_loss[i], summary = sess.run([train_op, loss_op, merged], feed_dict={tf_input: data_train, 
                                                         tf_label: np.reshape(labels_train, [-1, 1])})
        train_writer.add_summary(summary, i)
        pred_test, test_loss[i], summary = sess.run([y, loss_op, merged], feed_dict={tf_input: data_test, 
                                                                 tf_label: np.reshape(labels_test, [-1, 1])})
        test_writer.add_summary(summary, i)

    Z = sess.run(y, feed_dict={tf_input: (np.c_[xx.ravel(), yy.ravel()]).astype('float32')})
    Z = Z.reshape(xx.shape)

fig = plt.figure(figsize=(14, 5))
ax = fig.add_subplot(1, 2, 1)
ax.contourf(xx, yy, Z, cmap=plt.cm.RdBu_r, alpha=0.75)
ax.scatter(data[labels==0, 0], data[labels==0, 1], color='k', marker='o', alpha=0.5)
ax.scatter(data[labels==1, 0], data[labels==1, 1], color='k', marker='x', alpha=0.5)
ax = fig.add_subplot(1, 2, 2)
ax.plot(train_loss, label='Train loss', linewidth=2)
ax.plot(test_loss, label='Test loss', linewidth=2)
plt.grid()
plt.legend()
plt.tight_layout();

- Tensorboard: análisis del grafo, curvas de aprendizaje, histograma de parámetros