<img src="res/itm_logo.jpg" width="300px">

## Inteligencia Artificial - IAI84
### Instituto Tecnológico Metropolitano
#### Pedro Atencio Ortiz - 2018

In [None]:
import numpy as np

<h1>1. Funciones de utilidad</h1>

In [None]:
def linear_activation(W, b, X):
    '''
    Linear activation given and input X, and layer parameters W and b.
    '''
    z = np.dot(W,X) + b
    
    return z

In [None]:
def sigmoid(z):
    '''
    Returns sigmoid activation for array z
    '''
    a = 1. / (1. + np.exp(-z)) 
    
    return a 

In [None]:
def d_sigmoid(z):
    return sigmoid(z) * (1. - sigmoid(z))

In [None]:
def loss(y, a):
    '''
    Logistic loss.
    '''
    return -(y * np.log(a) + (1-y) * np.log(1-a))

In [None]:
def cost(logloss):
    '''
    Cost function as mean of loss for every sample in dataset.
    '''
    return np.mean(logloss)

In [None]:
def print_network(layers):
    '''
    Print network architecture defined by input dictionary 'layers'
    '''

    l = len(layers)
    for i in range(l):
        if(i == 0):
            print('Input layer: ', layers['l'+str(i)])
        elif(i < l-1):
            print('Hidden layer: ',i,' ', layers['l'+str(i)])
        else:
            print('Output layer: ', layers['l'+str(i)])

In [None]:
layers = {'l0':(2, 'sigmoid'), 'l1':(2, 'sigmoid'), 'l2':(3, 'sigmoid'), 'l3':(1, 'sigmoid')} #dictionary with layers parameters
print_network(layers)

<h1>2. Inicialización de parámetros</h1>

Definido como:

<font size=3>
<center>$W^{[i]} \in R^{nl^{[i]} \times nl^{[i-1]}}$</center>
<center>$b^{[i]} \in R^{nl^{[i]} \times 1}$</center>
</font>

In [None]:
def init(layers):
    '''
    Random initialization of parameters W and b for every layer in network specified in dictionary 'layers'.
    
    Input dictionary has the form: 'li':(int:number of neurons, string:activation function).
    activation function can be: 'sigmoid' and 'relu'
    '''
    
    l = len(layers) #number of layers
    
    parameters = {}
    
    for i in range(1, l):
        W = None
        b = None
        
        parameters['l'+str(i)] = {'W':W, 'b':b}
    
    return parameters

<h1>3. Forward propagation - propagación hacia adelante</h1>

Definida como:

<font size=3>
<center>$Z^{[i]} = W^{[i]} \cdot A^{[i-1]} + b^{[i]}$</center>
<center>$A^{[i]} = \sigma(Z^{i})$</center>
</font>

In [None]:
def feed_forward(X, parameters):
    '''
    Forward propagation given an input dataset X, and a neural network parameters in a dictionary.
    '''
    
    forward_computation = {}
    
    A = X
    forward_computation['l0'] = {'Z':None, 'A':X}
    
    l = len(parameters)
    
    for i in range(1,l+1):
        W = parameters['l'+str(i)]['W']
        b = parameters['l'+str(i)]['b']
        Z = None
        A = None
        
        forward_computation['l'+str(i)] = {'Z':Z, 'A':A}
    
    return forward_computation

In [None]:
layers = {'l0':(2, 'sigmoid'), 'l1':(2, 'sigmoid'), 'l2':(3, 'sigmoid'), 'l3':(1, 'sigmoid')} #dictionary with layers parameters
print_network(layers)

parameters = init(layers)
print parameters

In [None]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0, 1, 1, 0]])
X = X.T

forward_computation = feed_forward(X, parameters)
print(forward_computation)
print len(forward_computation)

<h1>4. Backward propagation</h1>

Definida como:

<font size=3>
<center>$dZ^{[i]} = A^{[i]} - Y$, si $i == l$</center>
<center>$dZ^{[i]} = (W^{[i+1]}T \cdot dZ^{[i+1]}) \times \sigma^{'}(Z^{[i]})$, en caso contrario</center>
<br>
<center>$dW^{[i]} = (dZ^{[i]} \cdot A^{[i-1]}) / m$</center>
<center>$db^{[i]} = \sum{(dZ^{[i]})} / m$</center>
</font>

In [None]:
def backward_propagation(parameters, forward_computation, Y):
    '''
    Computes derivatives for W and b for each layer in forward_computation.
    '''
    
    '''
    dZ3 = A3 - Y
    dW3 = np.dot(dZ3, A2.T) / m
    db3 = np.sum(dZ3, axis=1, keepdims=True) / m
    
    dZ2 = np.multiply(np.dot(W3.T, dZ3), d_sigmoid(Z2))
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m 
    
    dZ1 = np.multiply(np.dot(W2.T, dZ2), d_sigmoid(Z1))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m
    '''
    
    backward_computation = {}
    
    l = len(parameters)
    m = len(Y)
    
    for i in reversed(range(1,l+1)):
        #print('layer: ',i)
        
        if(i == l):
            dZ = None
        else:
            W = parameters['l'+str(i+1)]['W']
            Z = forward_computation['l'+str(i)]['Z']
            dZ = None
        
        A = forward_computation['l'+str(i-1)]['A']
        dW = None
        db = None
        
        backward_computation['l'+str(i)] = {'dZ':dZ, 'dW':dW, 'db':db}
        
        
    return backward_computation

In [None]:
backward_computation = backward_propagation(parameters, forward_computation, Y)
print backward_computation

<h1>5. Actualización de parámetros</h1>

Definida como:

<font size=3>
<center>$W^{[i]} = W^{[i]} - \alpha dW^{[i]}$</center>
<center>$b^{[i]} = b^{[i]} - \alpha db^{[i]}$</center>
</font>

In [None]:
def update_params(parameters, backward_computation, learning_rate):
    
    l = len(parameters)
    
    for i in range(1,l+1):
        parameters['l'+str(i)]['W'] = None
        parameters['l'+str(i)]['b'] = None
    
    return parameters

In [None]:
parameters = update_params(parameters, backward_computation, 0.5)
print parameters

<hr>
## Algunas utilidades

In [None]:
'''
UTILIDADES
'''

import sklearn
from sklearn import datasets
import matplotlib.pyplot as plt

def generate_data(data_type, noise=0.2):
    """
    Generate a binary dataset with distribution data_type

    Arguments:
    data_type -- distribution of dataset {moons,circles,blobs}

    Returns:
    X -- features
    Y -- labels
    """ 
    np.random.seed(0)
    if data_type == 'moons':
        X, Y = datasets.make_moons(200, noise=noise)
    elif data_type == 'circles':
        X, Y = sklearn.datasets.make_circles(200, noise=noise)
    elif data_type == 'blobs':
        X, Y = sklearn.datasets.make_blobs(centers=2, cluster_std=noise)
    return X, Y

def visualize_lr(parameters, X, Y):
    X = X.T
    
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    #Z = pred_func(W,b,np.c_[xx.ravel(), yy.ravel()])
    Z = predict_multilayer(parameters, np.c_[xx.ravel(), yy.ravel()].T)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.figure(figsize=(7,5))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    
    color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]
    plt.scatter(X[:,0], X[:,1], color=color)
    
    plt.show()
    
def predict_multilayer(parameters, X):
    forward_computation = {}
    
    A = X
    
    for i in range(len(parameters)):
        W = parameters['l'+str(i+1)]['W']
        b = parameters['l'+str(i+1)]['b']
        Z = linear_activation(W,b,A)
        A = sigmoid(Z)

    return np.round(A)

<hr>
# Trabajemos
3. Realicemos descenso del gradiente sobre la red neural completa.

### - Dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt

X, Y = generate_data('moons')
nx,m = X.T.shape

color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]

plt.figure(figsize=(7,5))
plt.scatter(X[:,0], X[:,1], color=color)

plt.show()

X = X.T

<hr>
### - Apliquemos descenso del gradiente a cada regresor logístico por separado

In [None]:
'''
Metaparameters initialization
'''
num_epochs = 20000
learning_rate = 0.1

'''
Dataset loading
'''
X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0, 1, 1, 0]])
X = X.T

'''
Parameters initialization
'''
layers = {'l0':(2, 'sigmoid'), 'l1':(5, 'sigmoid'), 'l2':(5, 'sigmoid'), 'l3':(1, 'sigmoid')} #dictionary with layers parameters
parameters = init(layers)

l = len(layers)

error_array = np.zeros([num_epochs])#lo utilizaremos para plotear el error

'''
Gradient descent
'''
for i in range(num_epochs): 
    '''
    Forward Propagation
    '''
    forward_computation = None
        
    '''
    Backward Propagation
    '''
    backward_computation = None
    
    '''
    Parameters Update
    '''
    parameters = None
    
    '''
    Cost estimation
    '''
    J = cost(loss(Y, forward_computation['l'+str(l-1)]['A']))
    error_array[i] = J
    
    
    if(i%1000 == 0):
        print("costo -- iteracion ", i, ": ", J)
        
print("parametros actualizados: ", parameters)

'''
Visualizacion del error por epoca
'''
plt.figure(figsize=(10,5))
plt.plot(np.linspace(0,num_epochs-1, num_epochs), error_array)
plt.xlabel("numero de epocas")
plt.ylabel("error: "+r'$J$')
plt.show()

### - Visualizacion del resultado

In [None]:
import matplotlib.pyplot as plt

def visualize_lr(parameters, X, Y):
    X = X.T
    
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z = predict_multilayer(parameters, np.c_[xx.ravel(), yy.ravel()].T)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.figure(figsize=(7,5))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    
    color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]
    plt.scatter(X[:,0], X[:,1], color=color)
    
    plt.show()

In [None]:
visualize_lr(parameters, X, Y)