In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

# Perceptrón Multi Capa

## Capas

In [2]:
class Layer:
    def __init__(self):
        self.params = []  # Parametros
        self.grads = []  # Gradiantes
        
    def __call__(self, x):
        return x
    
    def backward(self, grad):
        return grad
    
    def update(self, params):
        '''Si hay parámetros, los actualizaremos con
        el resultado que indique el optimizer'''
        return

In [3]:
class Linear(Layer):
    def __init__(self, dim_in, dim_out):
        self.weigths = np.random.normal(loc=0.0,
                                       scale=np.sqrt(2 / (dim_in + dim_out)),
                                       size=(dim_in, dim_out))
        self.bias = np.zeros(dim_out)
        
    def __call__(self, x):
        self.x = x
        self.params = [self.weights, self.bias]
        return np.dot(x, self.weigths) + self.bias  # Salida del perceptrón
    
    def backward(self, grad_output):
        # gradiantes para la capa siguiente
        grad = np.dot(grad_output, self.weigths.T)
        self.grad_weigths = np.dot(self.x.T, grad_output)
        
        # gradiantes para actualizar pesos
        self.grad_bias = grad_output.mean(axis=0) * self.x.shape[0]
        self.grads = [self.grad_weigths, self.grad_bias]
        return grad
    
    def update(self, params):
        self.weigths, self.bias = params

In [4]:
class ReLU(Layer):
    def __call__(self, x):
        self.x = x
        return np.maximum(0, x)
    
    def backward(self, grad_output):
        grad = self.x > 0
        return grad_output * grad

In [5]:
class Sigmoid(Layer):
    def __activation(x):
        return 1 / (1 + np.exp(-x))
    
    def __call__(self, x):
        self.x = x
        return self.__activation(x)
    
    def backward(self, grad_output):
        sigmoid = self._activation(self.x)
        grad = sigmoid * (1 - sigmoid)
        return grad_output * grad

In [11]:
def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=-1, keepdims=True)

## Percetrón

In [6]:
class MLP:
    def __init__(self, layers):
        self.layers = layers
    
    def __call__(self, x):
        '''Calculamos la salida del modelo aplicando
        cada capa de manera secuencial
        '''
        for layer in self.layers:
            x = layers(x)
        return x

## Optimización 
Algoritmo de optimización

In [7]:
class SGD():
    '''Stochastic gradient descent
    '''
    def __init__(self, net, lr):
        self.net = net
        self.lr
        
    def update(self):
        for layer in self.net.layers:
            layer.update([
                params - self.lr * grads
                for params, grads in zip(layer.params, layer.grads)
            ])

## Funciones de Pérdida

In [8]:
class Loss():
    def __init__(self, net):
        self.net = net
        
    def backward(self):
        '''Derivada de la loss function con respeto
        a la salida del MLP'''
        grad = self.grad_loss()
        
        # backpropagation
        for layer in reversed(self.net.layers):
            grad = layer.backward(grad)        

In [9]:
class MSE(Loss):
    '''Mean Square Error
    '''
    def __call__(self, output, target):
        self.output, self.target = output, target.reshape(output.shape)
        loss = np.mean((self.grad_loss()) ** 2)
        return loss.mean()
    
    def grad_loss(self):
        return self.output - self.target

In [10]:
class BCE(Loss):
    '''Binary Cross Entropy
    '''
    def __call__(self, output, target):
        self.output, self.target = output, target.reshape(output.shape)
        loss = -np.mean(self.target * np.log(self.output) - (1 - self.target) * np.log(1 - self.output))
        return loss.mean()
    
    def grad_loss(self):
        return self.output - self.target

In [12]:
class CrossEntropy(Loss):
    def __call__(self, output, target):
        self.output, self.target = output, target
        logits = output[np.arange(len(output)), target]
        loss = -logits + np.log(np.sum(np.exp(output), axis=-1))
        return loss.mean()
    
    def grad_loss(self):
        answers = np.zeros_like(self.output)
        answers[np.arange(len(self.output)), self.target] = 1
        return (-answers + softmax(self.output)) / self.output.shape[0]