<img src="res/itm_logo.jpg" width="300px">

## Inteligencia Artificial - IAI84
### Instituto Tecnológico Metropolitano
#### Pedro Atencio Ortiz - 2018


En este notebook se aborda el tema de aprendizaje de máquina para clasificación binaria utilizando Regresión Logística:
1. Propagación hacia adelante (forward propagation)
2. Función de pérdida
3. Función de costo
4. Descenso del gradiente
5. Predicción

<hr>
# 1. Propagación hacia adelante (backward propagation)

In [None]:
import numpy as np

In [None]:
def sigmoid(z):
    '''
    Returns sigmoid activation for array z
    '''
    a = 1. / (1. + np.exp(-z)) 
    
    return a 

In [None]:
seed = 2
np.random.seed(2)
z = np.random.randn(1,3)
print(sigmoid(z))

In [None]:
def linear_activation(W, b, X):
    z = np.dot(W.T,X) + b
    
    return z

In [None]:
X = np.array([[1,2,3],[4,5,6]]).T
print("X: ",X)

Y = np.array([[0, 1]])
print("Y: ", Y)

W = np.array([[0.4], [-0.5], [0.01]])
print("W: ", W)

b = 0.3
print("b: ", b)

A = sigmoid(linear_activation(W, b, X))

print("forward propagation: ", A)

<hr>
# 2. Función de perdida

In [None]:
def loss(y, a):
    return -(y * np.log(a) + (1-y) * np.log(1-a))

In [None]:
seed = 2 #to be able to verify your result
np.random.seed(seed)
W = np.random.randn(2,1)
b = np.random.rand()
X = np.random.randn(2, 3)

Y = np.array([[1,1,0]]) #original labels for features X
A = sigmoid(linear_activation(W,b,X)) #forward activation

print("Perdida dato a dato: ", loss(Y, A))

<hr>
# 3. Función de costo

In [None]:
def cost(logloss):
    return np.mean(logloss)

In [None]:
logloss = np.array([[0.22068428,  0.24198147,  1.27491702]])
print("costo: ", cost(logloss))

<hr>
# 4. Descenso del gradiente (Gradient Descent) 

In [None]:
seed = 2
np.random.seed(seed)

X = np.random.rand(3,2)
Y = np.array([[0, 1]])

m = X.shape[1]

W = np.array([[0.1], [-0.1], [0.01]])
b = 0.1

print("m: ", m)
print("W inicial: ",W)
print("b inicial: ",b)

In [None]:
learning_rate = 0.05

for i in range(1000): #1000 iteraciones del descenso del gradiente
    Z = linear_activation(W,b,X)
    A = sigmoid(Z)
    dz = A - Y
    dW = np.dot(X,dz.T) / m
    db = np.sum(dz) / m
    J = cost(loss(Y,A))
    W -= learning_rate * dW
    b -= learning_rate * db
    
    if(i%100 == 0):
        print("costo: ", J)

print("W actualizado: ",W)
print("b actualizado: ",b)
print("costo total: ", J)

# 5. Predicción

La predicción consiste en aplicar forward propagation utilizando los W y b optimizados mediante descenso del gradiente.

In [None]:
def predict(W,b,X):
    z = linear_activation(W,b,X)
    A = sigmoid(z)
    return np.round(A)

In [None]:
Y_hat = predict(W,b,X)
print("predicciones: ",np.round(Y_hat))
print("clases originales: ", Y)

<hr>
# Regresión Logística sobre un dataset

In [None]:
'''
    Utility functions
'''

import numpy as np
import sklearn
from sklearn import datasets
import matplotlib.pyplot as plt

def generate_data(data_type, noise=0.2):
    """
    Generate a binary dataset with distribution data_type

    Arguments:
    data_type -- distribution of dataset {moons,circles,blobs}

    Returns:
    X -- features
    Y -- labels
    """ 
    np.random.seed(0)
    if data_type == 'moons':
        X, Y = datasets.make_moons(200, noise=noise)
    elif data_type == 'circles':
        X, Y = sklearn.datasets.make_circles(200, noise=noise)
    elif data_type == 'blobs':
        X, Y = sklearn.datasets.make_blobs(centers=2, cluster_std=noise)
    return X, Y

def visualize_lr(W, b, X, y):
    """
    Plots a classification boundary for a logistic regression model
    defined by W and b, using X (inputs) and y (outputs)

    Arguments:
    data_type -- distribution of dataset {moons,circles,blobs}

    Returns:
    W -- weights of lr model
    b -- bias of lr model
    X -- features
    y -- labels
    """
    X = X.T
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    #Z = pred_func(W,b,np.c_[xx.ravel(), yy.ravel()])
    Z = predict(W,b,np.c_[xx.ravel(), yy.ravel()].T)
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.figure(figsize=(7,5))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
    plt.show()

In [None]:
X, Y = generate_data('blobs', 1.5)
Y = Y.reshape(1,len(Y))
print(X.shape)
print(Y.shape)

In [None]:
color= ['red' if y == 1 else 'green' for y in np.squeeze(Y)]

plt.figure(figsize=(7,5))
plt.scatter(X[:,0], X[:,1], color=color)

plt.show()

X = X.T

In [None]:
#1. inicilicemos parametros W y b
m = X.shape[1]

W = np.random.randn(X.shape[0],1)
b = 0

print("m: ", m)
print("W inicial: ",W)
print("b inicial: ",b)

In [None]:
'''
Primero visualicemos cual seria la clasificacion con valores de W y b aleatorios.
'''
visualize_lr(W, b, X, Y)

In [None]:
#2. Regresion logistica mediante descenso del gradiente

learning_rate = 0.05

for i in range(10000): #1000 iteraciones del descenso del gradiente
    Z = linear_activation(W,b,X)
    A = sigmoid(Z)
    dz = A - Y
    dW = np.dot(X,dz.T) / m
    db = np.sum(dz) / m
    J = np.sum(-(Y * np.log(A) + (1-Y)*np.log(1-A))) / m
    
    W -= learning_rate * dW
    b -= learning_rate * db
    
    if(i%1000 == 0):
        print("costo: ", J)

print("W actualizado: ",W)
print("b actualizado: ",b)
print("costo final (error), despues de ",i+1," iteraciones: ", J)

In [None]:
print(predict(W,b,X))

In [None]:
visualize_lr(W, b, X, Y)