In [2]:
import numpy as np
import matplotlib.pyplot as plt
import math
import copy
import random 
from sklearn.model_selection import train_test_split

In [3]:
rng = np.random.RandomState(54321)
np.random.seed(54321)

x_data = rng.uniform(-10,10,200) + rng.randn(200)
y_data = x_data*x_data

x_data=x_data[:, np.newaxis]
y_data=y_data[:, np.newaxis]


data = np.concatenate((x_data, y_data), axis=1)
train, test = train_test_split(data, test_size=0.1)

In [4]:

class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes) #numero de capas
        self.sizes = sizes #arreglo con numero de neuronas por capa
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]] #inicializamos b's aleatoriamente
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]# inicializamos w's aleatoriamente

    def feedforward(self, a):
        """Regresa vector de activaciones "a"."""
        layer = 0
        for b, w in zip(self.biases, self.weights):
            layer += 1
            z = np.dot(w, a)+b
            if (layer + 1 == self.num_layers):
                a = lin(z)
            else:
                a = sigmoid(z)
        return a
        

    def SGD(self, training_data, epochs, mini_batch_size, alpha):
        """Entrena la red neuronal usando decenso por gradiente estocastico con mini-batch
            training_data es una lista con pares "(x,y)" representando el conjunto de entrenamiento."""
        n = len(training_data)
        for j in range(epochs):
            np.random.shuffle(training_data) 
            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n, mini_batch_size)] # crea lista de mini_batches
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, alpha)
            print ("Epoch {0} completada".format(j+1))

    def update_mini_batch(self, mini_batch, alpha):
        """Actualiza los biases(b's) y los pesos (w´s) de la red neuronal, con
           gradiente estocastigo usando backpropagation en cada minibatch.
        Minibatch es una lista con pares (x, y), and alpha es la taza de aprendizaje."""  
        nabla_b = [np.zeros(b.shape) for b in self.biases] #Vector de ceros para actualizar b's
        nabla_w = [np.zeros(w.shape) for w in self.weights] #Vector de ceros para actualizar w's
        for x, y in mini_batch: #Actualizamos b's y w's.
            delta_nabla_b, delta_nabla_w = self.backprop(x, y) #llamamos backpropagation
            #Actualizamos b's y w's.
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(alpha/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(alpha/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Regresa una tupla(nabla_b, nabla_w) representando el 
        gradiente para la funcion de costos. nabla_b y
        nabla_w son capa por capa listas de arreglos, similares a 
        self.biases y self.weights."""
        nabla_b = [np.zeros(b.shape) for b in self.biases] #lista con arreglos de ceros
        nabla_w = [np.zeros(w.shape) for w in self.weights] #lita con arreglos de ceros
        # feedforward
        activation = x
        activations = [x] # lista que almacena las activaciones de cada capa
        zs = [] # list que almacena todos los vectores z vector de cada apa
        layer = 0
        for b, w in zip(self.biases, self.weights):
            layer = layer + 1
            z = np.dot(w, activation)+b
            zs.append(z)
            if (layer + 1 == self.num_layers):
                activation = lin(z)
            else:
                activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y)
             # multiplicado por la derivada de nuestra funcion en la ultima capa que en este caso es uno 
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        #Recorremos las capas de atras hacia adelante. Donde l = 1 quiere decir la ultima capa.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def cost_derivative(self, output_activations, y):
        """Regresa la derivada parcial del Error cuadtratico medio."""
        return (output_activations-y)

#### Funciones que utilizamos
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

def lin(z):
    """La funcion identidad."""
    return z

def lin_prime(z):
    """Derivada de la Funcion identidad."""
    return 1


In [5]:
net = Network([1,6,1])
net.SGD(train,10, 10,3)

Epoch 1 completada
Epoch 2 completada
Epoch 3 completada
Epoch 4 completada
Epoch 5 completada
Epoch 6 completada
Epoch 7 completada
Epoch 8 completada
Epoch 9 completada
Epoch 10 completada


In [17]:
x = np.zeros(len(test))
for i in range(0,len(test)):
    x[i] = (net.feedforward(test[i,0]))
x

array([-2.69977836e+146, -7.05329105e+146, -2.69977836e+146,
       -7.05329105e+146, -2.69977836e+146, -7.05329105e+146,
       -7.05329105e+146, -2.69977836e+146, -7.05329105e+146,
       -7.05329105e+146, -7.05329105e+146, -7.05329105e+146,
       -7.05329105e+146, -2.69977836e+146, -2.69977836e+146,
       -2.69977836e+146, -7.05329105e+146, -7.05329105e+146,
       -2.69977836e+146, -2.69977836e+146])