<img src="res/itm_logo.jpg" width="300px">

## Inteligencia Artificial - IAI84
### Instituto Tecnológico Metropolitano
#### Pedro Atencio Ortiz - 2018

In [24]:
import numpy as np

In [6]:
def linear_activation(W, b, X):
    '''
    Linear activation given and input X, and layer parameters W and b.
    '''
    z = np.dot(W,X) + b
    
    return z

In [7]:
def sigmoid(z):
    '''
    Returns sigmoid activation for array z
    '''
    a = 1. / (1. + np.exp(-z)) 
    
    return a 

In [8]:
def d_sigmoid(z):
    return sigmoid(z) * (1. - sigmoid(z))

In [9]:
def loss(y, a):
    '''
    Logistic loss.
    '''
    return -(y * np.log(a) + (1-y) * np.log(1-a))

In [10]:
def cost(logloss):
    '''
    Cost function as mean of loss for every sample in dataset.
    '''
    return np.mean(logloss)

In [35]:
def init(layers):
    '''
    Random initialization of parameters W and b for every layer in network specified in dictionary 'layers'.
    
    Input dictionary has the form: 'li':(int:number of neurons, string:activation function).
    activation function can be: 'sigmoid' and 'relu'
    '''
    
    l = len(layers) #number of layers
    
    parameters = {}
    
    for i in range(l-1):
        W = np.random.randn(layers['l'+str(i+1)][0], layers['l'+str(i)][0])
        b = np.zeros([layers['l'+str(i+1)][0], 1])
        
        parameters['l'+str(i+1)] = {'W':W, 'b':b}
    
    return parameters

In [20]:
def print_network(layers):
    '''
    Print network architecture defined by input dictionary 'layers'
    '''

    l = len(layers)
    for i in range(l):
        if(i == 0):
            print('Input layer: ', layers['l'+str(i)])
        elif(i < l-1):
            print('Hidden layer: ',i,' ', layers['l'+str(i)])
        else:
            print('Output layer: ', layers['l'+str(i)])

In [63]:
def feed_forward(X, parameters):
    '''
    Forward propagation given an input dataset X, and a neural network parameters in a dictionary.
    '''
    
    forward_computation = {}
    
    A = X
    forward_computation['l0'] = {'Z':None, 'A':X}
    
    for i in range(len(parameters)):
        W = parameters['l'+str(i+1)]['W']
        b = parameters['l'+str(i+1)]['b']
        Z = linear_activation(W,b,A)
        A = sigmoid(Z)
        
        forward_computation['l'+str(i+1)] = {'Z':Z, 'A':A}
    
    return forward_computation

In [67]:
layers = {'l0':(2, 'sigmoid'), 'l1':(2, 'sigmoid'), 'l2':(3, 'sigmoid'), 'l3':(1, 'sigmoid')} #dictionary with layers parameters
print_network(layers)

parameters = init(layers)
print parameters

('Input layer: ', (2, 'sigmoid'))
('Hidden layer: ', 1, ' ', (2, 'sigmoid'))
('Hidden layer: ', 2, ' ', (3, 'sigmoid'))
('Output layer: ', (1, 'sigmoid'))
{'l2': {'b': array([[0.],
       [0.],
       [0.]]), 'W': array([[ 1.49639429,  0.9796219 ],
       [-0.76059643, -0.22605261],
       [ 0.21300177,  0.12250899]])}, 'l3': {'b': array([[0.]]), 'W': array([[-1.07687278, -0.09153635,  0.61199995]])}, 'l1': {'b': array([[0.],
       [0.]]), 'W': array([[-0.33208066, -1.29511872],
       [ 0.06532401, -1.45294667]])}}


In [69]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
X = X.T

forward_computation = feed_forward(X, parameters)
print(forward_computation)

{'l2': {'A': array([[0.7752171 , 0.62419494, 0.75600475, 0.60860346],
       [0.37911071, 0.44859062, 0.39306565, 0.45758633],
       [0.54184077, 0.51724669, 0.53798484, 0.51485906]]), 'Z': array([[ 1.2380081 ,  0.50739218,  1.13089892,  0.44144573],
       [-0.49332452, -0.20636679, -0.43444399, -0.17006337],
       [ 0.16775538,  0.06901415,  0.15223266,  0.05945373]])}, 'l3': {'A': array([[0.36867482, 0.40210884, 0.37265255, 0.40560108]]), 'Z': array([[-0.53790608, -0.39668594, -0.52085404, -0.38218057]])}, 'l0': {'A': array([[0, 0, 1, 1],
       [0, 1, 0, 1]]), 'Z': None}, 'l1': {'A': array([[0.5       , 0.21498768, 0.41773445, 0.16421438],
       [0.5       , 0.18954848, 0.5163252 , 0.19978756]]), 'Z': array([[ 0.        , -1.29511872, -0.33208066, -1.62719938],
       [ 0.        , -1.45294667,  0.06532401, -1.38762266]])}}


In [None]:
def backward_propagation(forward_computation):
    '''
    Computes derivatives for W and b for each layer in forward_computation.
    '''
    for in range():

<hr>
## Algunas utilidades

In [None]:
'''
UTILIDADES
'''

import sklearn
from sklearn import datasets
import matplotlib.pyplot as plt

def generate_data(data_type, noise=0.2):
    """
    Generate a binary dataset with distribution data_type

    Arguments:
    data_type -- distribution of dataset {moons,circles,blobs}

    Returns:
    X -- features
    Y -- labels
    """ 
    np.random.seed(0)
    if data_type == 'moons':
        X, Y = datasets.make_moons(200, noise=noise)
    elif data_type == 'circles':
        X, Y = sklearn.datasets.make_circles(200, noise=noise)
    elif data_type == 'blobs':
        X, Y = sklearn.datasets.make_blobs(centers=2, cluster_std=noise)
    return X, Y

def visualize_lr(parameters, X, Y):
    X = X.T
    
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    #Z = pred_func(W,b,np.c_[xx.ravel(), yy.ravel()])
    Z = predict_multilayer(parameters, np.c_[xx.ravel(), yy.ravel()].T)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.figure(figsize=(7,5))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    
    color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]
    plt.scatter(X[:,0], X[:,1], color=color)
    
    plt.show()

<hr>
# Trabajemos
3. Realicemos descenso del gradiente sobre la red neural completa.

### - Dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt

X, Y = generate_data('moons')
nx,m = X.T.shape

color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]

plt.figure(figsize=(7,5))
plt.scatter(X[:,0], X[:,1], color=color)

plt.show()

X = X.T

<hr>
### - Apliquemos descenso del gradiente a cada regresor logístico por separado

In [None]:
'''
Metaparameters initialization
'''
num_epochs = 30000
learning_rate = 0.9

'''
Parameters initialization
'''
parameters = initialize_parameters(10)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
W3 = parameters["W3"]
b3 = parameters["b3"]

print ("parametros iniciales: ", parameters)

error_array = np.zeros([num_epochs])#lo utilizaremos para plotear el error

'''
Gradient descent
'''
for i in range(num_epochs): #2000 iteraciones del descenso del gradiente
    '''
    Forward Propagation
    '''
    Z1 = linear_activation(W1,b1,X)
    A1 = sigmoid(Z1)
    
    Z2 = linear_activation(W2,b2,A1)
    A2 = sigmoid(Z2)
    
    Z3 = linear_activation(W3,b3,A2)
    A3 = sigmoid(Z3)
        
    '''
    Backward Propagation
    '''
    dZ3 = A3 - Y
    dW3 = np.dot(dZ3, A2.T) / m
    db3 = np.sum(dZ3, axis=1, keepdims=True) / m
    
    dZ2 = np.multiply(np.dot(W3.T, dZ3), d_sigmoid(Z2))
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m 
    
    dZ1 = np.multiply(np.dot(W2.T, dZ2), d_sigmoid(Z1))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m
    
    '''
    Parameters Update
    '''
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    W3 -= learning_rate * dW3
    b1 -= learning_rate * db1
    b2 -= learning_rate * db2
    b3 -= learning_rate * db3
    
    '''
    Cost estimation
    '''
    J = cost(loss(Y,A3))
    error_array[i] = J
    
    
    if(i%1000 == 0):
        print("costo -- iteracion ", i, ": ", J)
        
print("parametros actualizados: ", parameters)

'''
Testing
'''
print("Predicciones del clasificador: ", predict_multilayer(parameters,X))

'''
Visualizacion del error por epoca
'''
plt.figure(figsize=(10,5))
plt.plot(np.linspace(0,num_epochs-1, num_epochs), error_array)
plt.xlabel("numero de epocas")
plt.ylabel("error: "+r'$J$')
plt.show()

### - Visualizacion del resultado

In [None]:
import matplotlib.pyplot as plt

def visualize_lr(parameters, X, Y):
    X = X.T
    
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z = predict_multilayer(parameters, np.c_[xx.ravel(), yy.ravel()].T)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.figure(figsize=(7,5))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    
    color= ['blue' if y == 1 else 'red' for y in np.squeeze(Y)]
    plt.scatter(X[:,0], X[:,1], color=color)
    
    plt.show()

In [None]:
visualize_lr(parameters, X, Y)