# XOR

In [3]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import json, matplotlib
#s = json.load( open("styles/bmh_matplotlibrc.json") )
#matplotlib.rcParams.update(s)
from IPython.core.pylabtools import figsize
figsize(11, 5)
colores = ["#348ABD", "#A60628","#06A628"]

In [4]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [5]:
import numpy as np

## Función de activación

In [6]:
# Devuelve la función logística evaluada
# componente por componente
def logistica(z):
    return 1 / (1 + np.exp(-z))

In [7]:
## Función que, dado un arreglo de valores z
## calcula el valor de la derivada para cada entrada.

def derivadaLogistica(z):
    g = logistica(z)
    return g * (1 - g)

## Red neuronal
La red implementa encadenamiento hacia adelante (para evaluar) y hacia atrás (para entrenarse).

In [8]:
np.random.seed(10)

In [59]:
class XOR:
    def __init__(self):
        self.Theta_0 = np.random.random((2,3))
        self.Theta_1 = np.random.random((1,3))
    
    def feedForward(self, X, vector = None):
        """ Calcula las salidas, dados los datos de entrada. """
        if vector is None:
            Theta_0 = self.Theta_0
            Theta_1 = self.Theta_1
        else:
            Theta_0, Theta_1 = self.reconstructMatrices(vector)
        
        self.A0 = np.vstack((np.ones((1, X.shape[0])), X.T))
        self.Z1 = np.dot(Theta_0, self.A0)
        self.A1 = np.vstack((np.ones((1, self.Z1.shape[1])), logistica(self.Z1)))
        self.Z2 = np.dot(Theta_1, self.A1)
        self.A2 = logistica(self.Z2)
        
    def backPropagate(self, X, Y):
        """ Calcula el error y su gradiente,
        dados los pesos actuales de la red y los resultados
        esperados.
        """
        self.feedForward(X)
        
        m = X.shape[0]
        #Delta_2 = (Y.T - self.A2)
        Delta_2 = Y.T * np.log(self.A2) + (1 - Y.T) * np.log(1 - self.A2)
        #self.error = np.sum(Delta_2 ** 2) / (2 * m) # Suma renglones (clases) y columnas (ejemplares)
        self.error = -1/m * np.sum(Delta_2)
            
        Delta_2 = (Y.T - self.A2)
        self.Grad_1 = - np.dot(Delta_2, self.A2.T) / m
        
        Delta_1 = np.dot(self.Theta_1[:,1:].T, Delta_2) * derivadaLogistica(self.Z1)
        self.Grad_0 = - np.dot(Delta_1, self.A1.T) / m
        
    def calcError(self, X, Y, vector):
        """
        Calcula el error que se cometería utilizando los pesos en 'vector'.
        """
        self.feedForward(X, vector)
        m = X.shape[0]
        #Delta_2 = (Y.T - self.A2)
        Delta_2 = Y.T * np.log(self.A2) + (1 - Y.T) * np.log(1 - self.A2)
        #error = np.sum(Delta_2.T ** 2) / (2 * m)
        error = -1/m * np.sum(Delta_2)
        return error
    
    def vectorWeights(self):
        """
        Acomoda a todos los parámetros en las matrices de pesos, en un solo vector.
        """
        vector = np.vstack((self.Theta_0.reshape((self.Theta_0.size, 1)),
                          self.Theta_1.reshape((self.Theta_1.size, 1))))
        #print(self.Theta_0, self.Theta_1, vector)
        return vector
    
    def reconstructMatrices(self, vector):
        """
        Dado un vector, rearma matrices del tamaño de las matrices de pesos.
        """
        M0 = vector[0:self.Theta_0.size].reshape(self.Theta_0.shape)
        M1 = vector[self.Theta_0.size:].reshape(self.Theta_1.shape)
        return M0, M1
        
    def approxGradient(self, X, Y):
        """
        Aproxima el valor del gradiente alrededor de los pesos actuales,
        perturbando cada valor, uno por uno.
        """
        vector = self.vectorWeights().copy()
        approx = np.zeros(vector.shape)
        perturb = np.zeros(vector.shape)
        epsilon = 0.0001
        
        for i in range(len(vector)):
            perturb[i] = epsilon
            loss1 = self.calcError(X, Y, vector - perturb)
            loss2 = self.calcError(X, Y, vector + perturb)
            perturb[i] = 0
            approx[i] = (loss2 - loss1) / (2 * epsilon)
        return self.reconstructMatrices(approx)
        
    def gradientDescent(self, X, Y, alpha, ciclos=10, checkGradient = False):
        """ Evalúa y ajusta los pesos de la red,
        de acuerdo a los datos en X y los resultados
        esperados, en Y.
        """
        errores = np.zeros(ciclos)
        for i in range(ciclos):
            self.backPropagate(X, Y)
            Grad_1 = self.Grad_1
            Grad_0 = self.Grad_0
            if checkGradient:
                ApproxT0, ApproxT1 = self.approxGradient(X, Y)
                print("Grad 0 = ", Grad_0, "\nApprox = ", ApproxT0, "\nDiff = ", Grad_0 - ApproxT0,
                     "\nGrad 1 = ", Grad_1, "\nApprox = ", ApproxT1, "\nDiff = ", Grad_1 - ApproxT1)
            self.Theta_1 -= alpha * Grad_1
            print(self.Theta_1.shape)
            print(Grad_1.shape)
            self.Theta_0 -= alpha * Grad_0
            errores[i] = self.error
        if ciclos > 1:
            plt.plot(np.arange(ciclos), errores)
        
    def printOutput(self):
        print(np.hstack((self.A0.T[:,1:], self.A2.T)))

In [60]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])
xor = XOR()
xor.feedForward(X)
xor.printOutput()

[[0.         0.         0.7012568 ]
 [0.         1.         0.70502782]
 [1.         0.         0.72161646]
 [1.         1.         0.72433194]]


In [61]:
print(xor.vectorWeights())


[[0.58419583]
 [0.70884983]
 [0.14853345]
 [0.42845074]
 [0.69389007]
 [0.10461974]
 [0.43960524]
 [0.16620215]
 [0.50697863]]


In [62]:
xor.gradientDescent(X, Y, 0.3, 1, checkGradient = True)

('Grad 0 = ', array([[0.00671123, 0.00471641, 0.00448747],
       [0.02207586, 0.01563276, 0.0148788 ]]), '\nApprox = ', array([[0.02684494, 0.01080645, 0.00787631],
       [0.08830346, 0.03823133, 0.0295281 ]]), '\nDiff = ', array([[-0.0201337 , -0.00609005, -0.00338883],
       [-0.06622759, -0.02259857, -0.0146493 ]]), '\nGrad 1 = ', array([[0.15189203]]), '\nApprox = ', array([[0.85223302, 0.61833365, 0.5888194 ]]), '\nDiff = ', array([[-0.70034099, -0.46644162, -0.43692738]]))
(1, 3)
(1, 1)


In [54]:
@interact_manual(ciclos = (50, 2000))
def trainXOR(ciclos):
    xor.gradientDescent(X, Y, 0.3, ciclos)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KEludFNsaWRlcih2YWx1ZT0xMDI1LCBkZXNjcmlwdGlvbj11J2NpY2xvcycsIG1heD0yMDAwLCBtaW49NTApLCBCdXR0b24oZGVzY3JpcHRpb249dSdSdW7igKY=


In [26]:
xor.feedForward(X)
xor.printOutput()
print("Theta_0 = ", xor.Theta_0, "\nTheta_1", xor.Theta_1)

[[0.         0.         0.49692713]
 [0.         1.         0.50143153]
 [1.         0.         0.49910208]
 [1.         1.         0.50308408]]
('Theta_0 = ', array([[0.9041104 , 0.67725557, 0.50360587],
       [0.18799615, 0.48319105, 0.76940289]]), '\nTheta_1', array([[-0.04557463, -0.05339381,  0.13035917]]))


In [14]:
from IPython.core.display import HTML
def css_styling():
    styles = open("styles/custom.css", "r").read() #or edit path to custom.css
    return HTML(styles)
css_styling()

IOError: [Errno 2] No such file or directory: 'styles/custom.css'