In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import json, matplotlib
#s = json.load( open("styles/bmh_matplotlibrc.json") )
#matplotlib.rcParams.update(s)
from IPython.core.pylabtools import figsize
figsize(11, 5)
colores = ["#348ABD", "#A60628","#06A628"]

In [2]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [3]:
import numpy as np

f = np.loadtxt('poker-hand-testing.data', delimiter = ',')
np.random.seed(10)
        
class Red(object):

    def __init__(self):
        self.Theta_0 = np.random.random((5, 11))
        self.Theta_1 = np.random.random((10, 6))
    
    def feedForward(self, X, vector = None):
        """ Calcula las salidas, dados los datos de entrada. """
        if vector is None:
            Theta_0 = self.Theta_0
            Theta_1 = self.Theta_1
        else:
            Theta_0, Theta_1 = self.reconstructMatrices(vector)
        
        self.A0 = np.vstack((np.ones((1, X.shape[0])), X.T))
        self.Z1 = np.dot(Theta_0, self.A0)
        self.A1 = np.vstack((np.ones((1, self.Z1.shape[1])), logistica(self.Z1)))
        self.Z2 = np.dot(Theta_1, self.A1)
        self.A2 = softmax(logistica(self.Z2))
        
    def backPropagate(self, X, Y):
        """ Calcula el error y su gradiente,
        dados los pesos actuales de la red y los resultados
        esperados.
        """
        self.feedForward(X)
        
        m = X.shape[1]
        reg = (0.001 / 2 * m) * (np.sum(self.vectorWeights())/X.shape[1])
        #Delta_2 = (Y.T - self.A2)
        Delta_2 = Y.T * np.log(self.A2).T + (1 - Y.T) * np.log(1 - self.A2).T
        self.error = -1/m * np.sum(Delta_2) + reg
        
        Delta_2 = np.subtract(Y.T, self.A2)
        self.Grad_1 = - np.dot(Delta_2, self.A2.T) / m
        
        Delta_1 = np.multiply(np.dot(self.Theta_1[:,1:].T, Delta_2), derivadaLogistica(self.Z1))
        self.Grad_0 = - np.dot(Delta_1, self.A1.T) / m
        
    def calcError(self, X, Y, vector):
        """
        Calcula el error que se cometería utilizando los pesos en 'vector'.
        """
        reg = (1 / 2 * m)
        self.feedForward(X, vector)
        m = 10
        #Delta_2 = (Y.T - self.A2)
        Delta_2 = Y.T * np.log(self.A2) + (1 - Y.T) * np.log(1 - self.A2)
        #error = np.sum(Delta_2.T ** 2) / (2 * m)
        error = -1/m * np.sum(Delta_2) + reg * (np.sum(vector))
        return error
    
    def vectorWeights(self):
        """
        Acomoda a todos los parámetros en las matrices de pesos, en un solo vector.
        """
        vector = np.vstack((self.Theta_0.reshape((self.Theta_0.size, 1)),
                          self.Theta_1.reshape((self.Theta_1.size, 1))))
        #print(self.Theta_0, self.Theta_1, vector)
        return vector
    
    def reconstructMatrices(self, vector):
        """
        Dado un vector, rearma matrices del tamaño de las matrices de pesos.
        """
        M0 = vector[0:self.Theta_0.size].reshape(self.Theta_0.shape)
        M1 = vector[self.Theta_0.size:].reshape(self.Theta_1.shape)
        return M0, M1
        
    def approxGradient(self, X, Y):
        """
        Aproxima el valor del gradiente alrededor de los pesos actuales,
        perturbando cada valor, uno por uno.
        """
        vector = self.vectorWeights().copy()
        approx = np.zeros(vector.shape)
        perturb = np.zeros(vector.shape)
        epsilon = 0.0001
        
        for i in range(len(vector)):
            perturb[i] = epsilon
            loss1 = self.calcError(X, Y, vector - perturb)
            loss2 = self.calcError(X, Y, vector + perturb)
            perturb[i] = 0
            approx[i] = (loss2 - loss1) / (2 * epsilon)
        return self.reconstructMatrices(approx)
        
    def gradientDescent(self, X, Y, alpha, ciclos=10, checkGradient = False):
        """ Evalúa y ajusta los pesos de la red,
        de acuerdo a los datos en X y los resultados
        esperados, en Y.
        """
        errores = np.zeros(ciclos)
        for i in range(ciclos):
            self.backPropagate(X, Y)
            Grad_1 = self.Grad_1
            Grad_0 = self.Grad_0
            
            if checkGradient:
                ApproxT0, ApproxT1 = self.approxGradient(X, Y)
                print("Grad 0 = ", Grad_0, "\nApprox = ", ApproxT0, "\nDiff = ", Grad_0 - ApproxT0,
                     "\nGrad 1 = ", Grad_1, "\nApprox = ", ApproxT1, "\nDiff = ", Grad_1 - ApproxT1)
            
            self.Theta_1 -= np.mean(np.multiply(alpha, Grad_1))
            self.Theta_0 -= np.mean(np.multiply(alpha, Grad_0))
            errores[i] = np.mean(self.error)
        if ciclos > 1:
            plt.plot(np.arange(ciclos), errores)
        
    def printOutput(self):
        print(np.hstack((self.A0.T[:,1:], self.A2.T)))

def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)

def logistica(z):
    return 1 / (1 + np.exp(-z))

def derivadaLogistica(z):
    g = logistica(z)
    return np.multiply(g,(1 - g))

def stable_softmax(X):
    exps = np.exp(X - np.max(X))
    return exps / np.sum(exps)


data = np.matrix(f)
np.random.seed(0)

X = data[:75000,:10]
Y = data[:75000,10:]
red = Red()
red.feedForward(X)

In [4]:
@interact_manual(ciclos = (1000, 20000))
def trainRed(ciclos):
    red.gradientDescent(X, Y, 0.3, ciclos)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KEludFNsaWRlcih2YWx1ZT0xMDUwMCwgZGVzY3JpcHRpb249dSdjaWNsb3MnLCBtYXg9MjAwMDAsIG1pbj0xMDAwKSwgQnV0dG9uKGRlc2NyaXB0aW9uPXXigKY=
