In [1]:
import numpy as np
import tensorflow as tf

In [18]:
# Función para añadir el término de bias a la primera fila
# x tiene shape (n, k)
# Devuelve matriz con tamaño (n+1, k)
def addBias(x):
    return np.vstack((np.ones(x.shape[1]), x))

In [5]:
Xtrain = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])
Xtrain

array([[0., 0.],
       [0., 1.],
       [1., 0.],
       [1., 1.]])

In [3]:
Ytrain = np.array([0., 1., 1., 0.]).reshape(-1, 1)
Ytrain

array([[0.],
       [1.],
       [1.],
       [0.]])

## Modelo en Tensorflow

In [91]:
def entrenarModelo(x, y, k1=3, k2=2, epochs=100, learning_rate=0.05, status_rate=100):
    
    # Número de features
    n = x.shape[1]
    
    # Definición del grafo
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # Placeholders de datos
        X = tf.placeholder(tf.float32, shape=(n, None), name="X")
        Y = tf.placeholder(tf.float32, shape=(1, None), name="Y")
        lr = tf.placeholder(tf.float32)
        
        # Pesos sinápticos
        Theta1 = tf.Variable(tf.truncated_normal(shape=(k1, n)), name="W1")
        Theta2 = tf.Variable(tf.truncated_normal(shape=(k2, k1)), name="W2")
        Theta3 = tf.Variable(tf.truncated_normal(shape=(1, 2)), name="W3")
        # Bias
        Theta1_0 = tf.Variable(tf.zeros(shape=(k1, 1)), name="W1_0")
        Theta2_0 = tf.Variable(tf.zeros(shape=(k2, 1)), name="W2_0")
        Theta3_0 = tf.Variable(tf.zeros(shape=(1, 1)), name="W3_0")
        
        # Forward propagation
        with tf.name_scope("fwd_prop"):
            # Primera capa oculta
            a_2 = tf.nn.relu(tf.add(tf.matmul(Theta1, X), Theta1_0), name="a_2")
            # Segunda capa oculta
            a_3 = tf.nn.relu(tf.add(tf.matmul(Theta2, a_2), Theta2_0), name="a_3")
            # capa de salida
            a_4 = tf.add(tf.matmul(Theta3, a_3), Theta3_0, name="a_4")
            
        # Función de costo
        with tf.name_scope("cost_fn"):
            cost = tf.losses.mean_squared_error(labels=Y, predictions=a_4)
            
         # Gradient Descent Optimizer 
        with tf.name_scope("GradientDes.Optimizer"):
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(cost) 
        
        # Global Variables Initializer 
        init = tf.global_variables_initializer()
        
    # Ejecución del grafo
    with tf.Session(graph=g) as sess:
        # Inicializar las variables del grafo 
        sess.run(init) 
        
        # Iterar para cada epoch 
        for epoch in range(1, epochs+1): 
            # Ejecutar el optimizador de gradiente en descenso y el summary de costo
            sess.run(optimizer, feed_dict = {X : x.T, Y : y.T, lr : learning_rate})
            # Obtener el costo para imprimir
            if epoch % status_rate == 0: 
                c = sess.run(cost, feed_dict = {X : x.T, Y : y.T, lr : learning_rate})
                print("Epoch = %d,\tCosto: %0.4f" % (epoch, c))

        # Obtener las matrices de pesos finales
        W1, W2, W3, W1_0, W2_0, W3_0  = sess.run([Theta1, Theta2, Theta3, Theta1_0, Theta2_0, Theta3_0])
        return W1, W2, W3, W1_0, W2_0, W3_0

In [92]:
W1, W2, W3, W1_0, W2_0, W3_0 = entrenarModelo(Xtrain, Ytrain, learning_rate=0.01, epochs=1000, status_rate=100)

Epoch = 100,	Costo: 0.2543
Epoch = 200,	Costo: 0.2501
Epoch = 300,	Costo: 0.2500
Epoch = 400,	Costo: 0.2500
Epoch = 500,	Costo: 0.2500
Epoch = 600,	Costo: 0.2500
Epoch = 700,	Costo: 0.2500
Epoch = 800,	Costo: 0.2500
Epoch = 900,	Costo: 0.2500
Epoch = 1000,	Costo: 0.2500


In [103]:
print(W1, "\n", W1_0)

[[-0.357879   -0.4056135 ]
 [ 0.03135432 -1.9029738 ]
 [ 0.21004152 -1.2075989 ]] 
 [[ 0.        ]
 [-0.04152244]
 [-0.02947292]]


In [104]:
print(W2, "\n", W2_0)

[[-0.05889202  1.1899521   0.3884971 ]
 [ 0.16063769 -0.5221472  -1.5411413 ]] 
 [[-0.07431808]
 [ 0.        ]]


In [105]:
print(W3, "\n", W3_0)

[[-1.0535052  -0.25866246]] 
 [[0.49999925]]


In [113]:
# Función de activación ReLU
def ReLU(x):
    return np.maximum(0., x)

def forwardProp(X, W1, W2, W3, W1_0, W2_0, W3_0):
    # Primera capa oculta
    z_2 = np.matmul(W1, X) + W1_0
    a_2 = ReLU(z_2)
    # Segunda capa oculta
    z_3 = np.matmul(W2, a_2) + W2_0
    a_3 = ReLU(z_3)
    # Capa de salida
    z_4 = np.matmul(W3, a_3) + W3_0
    a_4 = z_4
    return a_4

In [114]:
forwardProp(Xtrain.T, W1, W2, W3, W1_0, W2_0, W3_0)

array([[0.49999925, 0.49999925, 0.49999925, 0.49999925]])

In [115]:
W1, W2, W3, W1_0, W2_0, W3_0 = entrenarModelo(Xtrain, Ytrain, learning_rate=0.01, epochs=1000, status_rate=100)

Epoch = 100,	Costo: 0.2423
Epoch = 200,	Costo: 0.2355
Epoch = 300,	Costo: 0.2311
Epoch = 400,	Costo: 0.2254
Epoch = 500,	Costo: 0.2172
Epoch = 600,	Costo: 0.2101
Epoch = 700,	Costo: 0.2013
Epoch = 800,	Costo: 0.1913
Epoch = 900,	Costo: 0.1797
Epoch = 1000,	Costo: 0.1666


In [116]:
forwardProp(Xtrain.T, W1, W2, W3, W1_0, W2_0, W3_0)

array([[0.47460448, 0.70204456, 0.49718089, 0.31533585]])

In [118]:
W1, W2, W3, W1_0, W2_0, W3_0 = entrenarModelo(Xtrain, Ytrain, learning_rate=0.01, epochs=2000, status_rate=100)

Epoch = 100,	Costo: 0.2188
Epoch = 200,	Costo: 0.1895
Epoch = 300,	Costo: 0.1768
Epoch = 400,	Costo: 0.1713
Epoch = 500,	Costo: 0.1686
Epoch = 600,	Costo: 0.1674
Epoch = 700,	Costo: 0.1669
Epoch = 800,	Costo: 0.1667
Epoch = 900,	Costo: 0.1667
Epoch = 1000,	Costo: 0.1667
Epoch = 1100,	Costo: 0.1667
Epoch = 1200,	Costo: 0.1667
Epoch = 1300,	Costo: 0.1667
Epoch = 1400,	Costo: 0.1667
Epoch = 1500,	Costo: 0.1667
Epoch = 1600,	Costo: 0.1667
Epoch = 1700,	Costo: 0.1667
Epoch = 1800,	Costo: 0.1667
Epoch = 1900,	Costo: 0.1667
Epoch = 2000,	Costo: 0.1667


In [119]:
forwardProp(Xtrain.T, W1, W2, W3, W1_0, W2_0, W3_0)

array([[0.3333353 , 0.99999658, 0.3333353 , 0.3333353 ]])

## Prueba con activación sigmoide y función de costo de entropía cruzada

In [122]:
def entrenarModeloSigmoid(x, y, k1=3, k2=2, epochs=100, learning_rate=0.05, status_rate=100):
    
    # Número de features
    n = x.shape[1]
    
    # Definición del grafo
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # Placeholders de datos
        X = tf.placeholder(tf.float32, shape=(n, None), name="X")
        Y = tf.placeholder(tf.float32, shape=(1, None), name="Y")
        lr = tf.placeholder(tf.float32)
        
        # Pesos sinápticos
        Theta1 = tf.Variable(tf.truncated_normal(shape=(k1, n)), name="W1")
        Theta2 = tf.Variable(tf.truncated_normal(shape=(k2, k1)), name="W2")
        Theta3 = tf.Variable(tf.truncated_normal(shape=(1, 2)), name="W3")
        # Bias
        Theta1_0 = tf.Variable(tf.zeros(shape=(k1, 1)), name="W1_0")
        Theta2_0 = tf.Variable(tf.zeros(shape=(k2, 1)), name="W2_0")
        Theta3_0 = tf.Variable(tf.zeros(shape=(1, 1)), name="W3_0")
        
        # Forward propagation
        with tf.name_scope("fwd_prop"):
            # Primera capa oculta
            a_2 = tf.nn.relu(tf.add(tf.matmul(Theta1, X), Theta1_0), name="a_2")
            # Segunda capa oculta
            a_3 = tf.nn.relu(tf.add(tf.matmul(Theta2, a_2), Theta2_0), name="a_3")
            # capa de salida
            z_4 = tf.add(tf.matmul(Theta3, a_3), Theta3_0, name="z_4")
            a_4 = tf.nn.sigmoid(z_4, name="a_4")
            
        # Función de costo
        with tf.name_scope("cost_fn"):
            #cost = tf.losses.mean_squared_error(labels=Y, predictions=a_4)
            cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=z_4))
            
         # Gradient Descent Optimizer 
        with tf.name_scope("GradientDes.Optimizer"): 
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(cost) 
        
        # Global Variables Initializer 
        init = tf.global_variables_initializer()
        
    # Ejecución del grafo
    with tf.Session(graph=g) as sess:
        # Inicializar las variables del grafo 
        sess.run(init) 
        
        # Iterar para cada epoch 
        for epoch in range(1, epochs+1): 
            # Ejecutar el optimizador de gradiente en descenso y el summary de costo
            sess.run(optimizer, feed_dict = {X : x.T, Y : y.T, lr : learning_rate})
            # Obtener el costo para imprimir
            if epoch % status_rate == 0: 
                c = sess.run(cost, feed_dict = {X : x.T, Y : y.T, lr : learning_rate})
                print("Epoch = %d,\tCosto: %0.4f" % (epoch, c))

        # Obtener las matrices de pesos finales
        W1, W2, W3, W1_0, W2_0, W3_0  = sess.run([Theta1, Theta2, Theta3, Theta1_0, Theta2_0, Theta3_0])
        return W1, W2, W3, W1_0, W2_0, W3_0

In [138]:
def forwardPropSigmoid(X, W1, W2, W3, W1_0, W2_0, W3_0):
    # Activación sigmoide
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    
    # Primera capa oculta
    z_2 = np.matmul(W1, X) + W1_0
    a_2 = ReLU(z_2)
    # Segunda capa oculta
    z_3 = np.matmul(W2, a_2) + W2_0
    a_3 = ReLU(z_3)
    # Capa de salida
    z_4 = np.matmul(W3, a_3) + W3_0
    a_4 = sigmoid(z_4)
    return a_4

In [141]:
W1, W2, W3, W1_0, W2_0, W3_0 = entrenarModeloSigmoid(Xtrain, Ytrain, 
                                                     learning_rate=0.001, epochs=7500, status_rate=500)

Epoch = 500,	Costo: 0.6301
Epoch = 1000,	Costo: 0.6171
Epoch = 1500,	Costo: 0.6050
Epoch = 2000,	Costo: 0.5919
Epoch = 2500,	Costo: 0.5771
Epoch = 3000,	Costo: 0.5606
Epoch = 3500,	Costo: 0.5421
Epoch = 4000,	Costo: 0.5218
Epoch = 4500,	Costo: 0.4998
Epoch = 5000,	Costo: 0.4760
Epoch = 5500,	Costo: 0.4509
Epoch = 6000,	Costo: 0.4246
Epoch = 6500,	Costo: 0.3988
Epoch = 7000,	Costo: 0.3742
Epoch = 7500,	Costo: 0.3506


In [142]:
(forwardPropSigmoid(Xtrain.T, W1, W2, W3, W1_0, W2_0, W3_0) > 0.5).astype(np.float)

array([[0., 1., 1., 0.]])

In [143]:
forwardPropSigmoid(Xtrain.T, W1, W2, W3, W1_0, W2_0, W3_0)

array([[0.38548437, 0.84322165, 0.77268604, 0.38555346]])