In [1]:
import numpy as np

# Back-propagation (Práctica)

Entrenar un aproximador para la función XOR usando 2 capas intermedias.
* Usar 2 neuronas en la capa anterior a la salida (segunda capa oculta=
* Usar al menos 2( pueden ser más) en la primera capa oculta.
* Usar activación ReLU en las capas intermedias y no activación en la salida

Usar Numpy.

Realizar 5 experimentos, en cada experimento (corrida de entrenamiento):
* Inicializar los parámetros aleatoriamente con distribución normal centrada en 0 y std = 0.1
* Retornar la representación intermedia de la segunda capa oculta.

Graficar las 5 representaciones intermedias, comparar, comentar y/o concluir.


In [1477]:
def relu(X):
    return np.maximum(0,X) 


def sigmoid(X):
    return 1/(1+np.exp(-X))

def derivative_relu(array):
    return np.array([n > 0 for n in array], dtype = np.int)

def softplus(X):
    return np.log(1 + np.exp(X))

In [3167]:
class NeuralNetXOR:
    def __init__(self, X, y_real):
        self.X = X
        self.y_real = y_real
        self.w_1 = np.random.normal(0, 0.1, (5,3))
        self.w_2 = np.random.normal(0, 0.1, (2,6))
        self.w_3 = np.random.normal(0, 0.1, (1,3))
        
    def forward_propagation(self):
        X_input = np.append(np.ones((self.X.shape[0], 1)), self.X, axis = -1)
        l1 = np.matmul(X_input, self.w_1.T)
        l1_a = relu(l1)
      
        l1_a_w_bias = np.append(np.ones((l1_a.shape[0], 1)),l1_a, axis = -1)
        
        l2 = np.matmul(l1_a_w_bias, self.w_2.T)
        l2_a = relu(l2)
       
        l2_a_w_bias = np.append(np.ones((l2_a.shape[0], 1)), l2_a, axis = -1 )
        
        output = np.matmul(l2_a_w_bias, self.w_3.T)
        output_a = sigmoid(output)
        
        return l1, l1_a, l2, l2_a, output, output_a
    
    def error(self):
        _,_,_,_,_,output = self.forward_propagation()
        
        cross_entropy = -1 * np.mean(y_real * (np.log(output)) + (1 - y_real) * (np.log(1 - output)))
        
        
        return cross_entropy
    
    def back_prop(self):
        error = self.error()
        l1, l1_a, l2, l2_a, output, output_a = self.forward_propagation()
        d_error_d_output = (output_a - y_real)
        d_error_d_w_3 = np.matmul(l2_a.T,d_error_d_output)
        d_error_d_l2 =  np.matmul(d_error_d_output, self.w_3[:,1:]) * derivative_relu(l2)
        d_error_d_w_2 = np.matmul(l1_a.T,d_error_d_l2)
        d_error_d_l1 = np.matmul(d_error_d_l2,self.w_2[:,1:]) * derivative_relu(l1)
        d_error_d_w_1 = np.matmul(self.X.T,d_error_d_l1)
        d_error_b_3 = np.sum(d_error_d_output, axis = 0, keepdims =True)
        d_error_b_2 = np.sum(d_error_d_l2, axis = 0, keepdims = True)
        d_error_b_1 = np.sum(d_error_d_l1, axis = 0, keepdims = True)        
        
        return d_error_d_w_3, d_error_d_w_2, d_error_d_w_1, d_error_b_3, d_error_b_2, d_error_b_1
    
    def fit(self, lr, epochs):
        print("initial error: " + str(self.error()))
        for i in range(0, epochs):
            d_w_3, d_w_2, d_w_1, d_b_3, d_b_2, d_b_1 = self.back_prop()
            self.w_3[:,:1] = self.w_3[:,:1][0] - (lr * d_b_3[0])
            self.w_3[:,1:] = self.w_3[:,1:][0] - (lr * d_w_3[0])
            self.w_2[:,:1] = (self.w_2[:,:1].reshape(1,-1) - (lr * d_b_2)).reshape(-1,1)
            self.w_2[:,1:] = self.w_2[:,1:] - lr * d_w_2
            self.w_1[:,:1] = (self.w_1[:,:1].reshape(1,-1) - (lr * d_b_2)).reshape(-1,1)
            self.w_1[:,1:] = self.w_1[:,1:] - lr * d_w_3
            if i%1 == 0:
                print("Epoch " + str(i) + ": " + str(self.error()))
            
    def predict(self):
        _,_,_,_,_,output = self.forward_propagation()
        
        predictions = np.array([n > 0.5 for n in output], dtype = np.int)
        
        return predictions


In [3155]:
class NeuralNetXOR:
    def __init__(self, X, y_real):
        self.X = X
        self.y_real = y_real
        self.w_1 = np.random.normal(0, 1, (2,3))
        self.w_2 = np.random.normal(0, 1, (2,3))
        self.w_3 = np.random.normal(0, 1, (1,3))
        
    def forward_propagation(self):
        X_input = np.append(np.ones((self.X.shape[0], 1)), self.X, axis = -1)
        l1 = np.matmul(X_input, self.w_1.T)
        l1_a = softplus(l1)
      
        l1_a_w_bias = np.append(np.ones((l1_a.shape[0], 1)),l1_a, axis = -1)
        l2 = np.matmul(l1_a_w_bias, self.w_2.T)
        l2_a = softplus(l2)
       
        l2_a_w_bias = np.append(np.ones((l2_a.shape[0], 1)), l2_a, axis = -1 )
        
        output = np.matmul(l2_a_w_bias, self.w_3.T)
        output_a = sigmoid(output)
        
        return l1, l1_a, l2, l2_a, output, output_a
    
    def error(self):
        _,_,_,_,_,output = self.forward_propagation()
        
        #cross_entropy = -1 * 1/8(self.y_real * (np.log(output)) + (1 - self.y_real) * (np.log(1 - output)))
        
        MSE = np.squeeze(1/8*(np.sum(np.square(self.y_real - self.predict()))))
        
        return MSE
    
    def back_prop(self):
        error = self.error()
        l1, l1_a, l2, l2_a, output, output_a = self.forward_propagation()
        d_error_d_output =  (-1/4 *(y_real - output_a))*(output_a * (1 - output_a))
        d_error_d_w_3 = np.matmul(l2_a.T,d_error_d_output)
        d_error_d_l2 =  np.matmul(d_error_d_output, self.w_3[:,1:]) * sigmoid(l2)
        d_error_d_w_2 = np.matmul(l1_a.T,d_error_d_l2)
        d_error_d_l1 = np.matmul(d_error_d_l2,self.w_2[:,1:]) * sigmoid(l1)
        d_error_d_w_1 = np.matmul(self.X.T,d_error_d_l1)
        d_error_b_3 = np.sum(d_error_d_output, axis = 0, keepdims =True)
        d_error_b_2 = np.sum(d_error_d_l2, axis = 0, keepdims = True)
        d_error_b_1 = np.sum(d_error_d_l1, axis = 0, keepdims = True)        
        
        return d_error_d_w_3, d_error_d_w_2, d_error_d_w_1, d_error_b_3, d_error_b_2, d_error_b_1
    
    def fit(self, lr, epochs):
        print("initial error: " + str(self.error()))
        for i in range(0, epochs):
            d_w_3, d_w_2, d_w_1, d_b_3, d_b_2, d_b_1 = self.back_prop()
            self.w_3[:,:1] = self.w_3[:,:1][0] - (lr * d_b_3[0])
            self.w_3[:,1:3] = self.w_3[:,1:][0] - (lr * d_w_3[0])
            self.w_2[:,:1] = (self.w_2[:,:1].reshape(1,-1) - (lr * d_b_2)).reshape(-1,1)
            self.w_2[:,1:3] = self.w_2[:,1:] - lr * d_w_2
            self.w_1[:,:1] = (self.w_1[:,:1].reshape(1,-1) - (lr * d_b_2)).reshape(-1,1)
            self.w_1[:,1:3] = self.w_1[:,1:] - lr * d_w_3
            if i%10000 == 0:
                print("Epoch " + str(i) + ": " + str(self.error()))
            
    def predict(self):
        _,_,_,_,_,output = self.forward_propagation()
        
        predictions = np.array([n > 0.5 for n in output], dtype = np.int)
        
        return predictions


In [3156]:
X_inputs = np.array([[0,0], [0,1], [1,0], [1,1]])

In [3157]:
y_real = np.array([[0], [1], [1], [0]])

In [3168]:
nn = NeuralNetXOR(X_inputs, y_real)

In [3169]:
nn.w_1

array([[-0.10552497,  0.05421978, -0.16167337],
       [-0.03901832,  0.1581707 ,  0.07542578],
       [-0.13402289, -0.11917437,  0.03019825],
       [-0.11033598, -0.11135892,  0.00417707],
       [-0.04753658, -0.06248371, -0.019472  ]])

In [3170]:
nn.forward_propagation()

(array([[-0.10552497, -0.03901832, -0.13402289, -0.11033598, -0.04753658],
        [-0.26719834,  0.03640746, -0.10382464, -0.10615891, -0.06700858],
        [-0.05130519,  0.11915238, -0.25319726, -0.22169489, -0.11002029],
        [-0.21297857,  0.19457815, -0.22299901, -0.21751783, -0.12949229]]),
 array([[0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.03640746, 0.        , 0.        , 0.        ],
        [0.        , 0.11915238, 0.        , 0.        , 0.        ],
        [0.        , 0.19457815, 0.        , 0.        , 0.        ]]),
 array([[ 0.00245438, -0.0061506 ],
        [ 0.00033222, -0.00751978],
        [-0.00449089, -0.01063156],
        [-0.00888739, -0.0134681 ]]),
 array([[0.00245438, 0.        ],
        [0.00033222, 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]]),
 array([[0.07351011],
        [0.07359045],
        [0.07360303],
        [0.07360303]]),
 array([[0.51836926],
        [0.51838931]

In [3176]:
nn.predict()

array([[0],
       [0],
       [1],
       [1]])

In [3177]:
nn.back_prop()

(array([[0.00106098],
        [0.        ]]),
 array([[0.       , 0.       ],
        [0.0007287, 0.       ],
        [0.       , 0.       ],
        [0.       , 0.       ],
        [0.       , 0.       ]]),
 array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        , -0.00116667,  0.        ,  0.        ,  0.        ]]),
 array([[-1.45825407e-06]]),
 array([[1.14443336e-06, 0.00000000e+00]]),
 array([[ 0.        , -0.00116667,  0.        ,  0.        ,  0.        ]]))

In [3174]:
nn.fit(1,1000)

initial error: 0.6931368437084947


ValueError: operands could not be broadcast together with shapes (2,5) (5,2) 