# Learning the XOR Function

  This example comes from Chapter 3 of the book <u>Learning Deep Learning</u> by Magnus Ekman. The purpose of this example is to see backprogation in practice.

In [1]:
# Import statement
import numpy as np

In [3]:
# This makes the random number generation repeatable
np.random.seed(3) 

LEARNING_RATE = 0.1
index_list = [0, 1, 2, 3] 

# Define training examples
x_train = [np.array([1.0, -1.0, -1.0]), np.array([1.0, -1.0, -1.0]), 
           np.array([1.0, -1.0, -1.0]), np.array([1.0, -1.0, -1.0])]
y_train = [0.0, 1.0, 1.0, 0.0] # Ground truth is commonly called y_train

In [4]:
def neuron_w(input_count):
    weights = np.zeros(input_count+1)
    for i in range(1, (input_count+1)):
        weights[i] = np.random.uniform(-1.0, 1.0)
    return weights

# Initial Weights
# The input weights are randomized to break the symmetry of bias weights being [0, 0, 0]
n_w = [neuron_w(2), neuron_w(2), neuron_w(2)]
n_y = [0, 0, 0]
n_error = [0, 0, 0]

In [8]:
def show_learning():
    print('Current weights:')
    for i, w in enumerate(n_w):
        print('neuron ', i, ': w0 =', '%5.2f' % w[0],
              ', w1 =', '%5.2f' % w[1], ', w2 =',
              '%5.2f' % w [2])
    print('-----------------')

def forward_pass(x):
    global n_y
    n_y[0] = np.tanh(np.dot(n_w[0], x)) # Neuron 0
    n_y[1] = np.tanh(np.dot(n_w[1], x)) # Neuron 1
    n2_inputs = np.array([1.0, n_y[0], n_y[1]]) # 1.0 is bias
    z2 = np.dot(n_w[2], n2_inputs)
    n_y[2] = 1.0 / (1.0 + np.exp(-z2))

def backward_pass(y_truth):
    global n_error
    error_prime = -(y_truth - n_y[2]) # Derivative of loss function
    derivative = n_y[2] * (1.0 - n_y[2]) # Logistic derivative
    n_error[2] = error_prime * derivative
    derivative = 1.0 - n_y[0]**2 # tanh derivative
    n_error[0] = n_w[2][1] * n_error[2] * derivative
    derivative = 1.0 - n_y[1]**2 # tanh derivative
    n_error[1] = n_w[2][2] * n_error[2] * derivative

def adjust_weights(x):
    global n_w
    n_w[0] -= (x * LEARNING_RATE * n_error[0])
    n_w[1] -= (x * LEARNING_RATE * n_error[1])
    n2_inputs = np.array([1.0, n_y[0], n_y[1]]) # 1.0 is bias
    n_w[2] -= (n2_inputs * LEARNING_RATE * n_error[2])

In [9]:
# Network training loop
all_correct = False
while not all_correct: # Train until converged
    all_correct = True
    np.random.shuffle(index_list) 
    for i in index_list:
        forward_pass(x_train[i])
        backward_pass(y_train[i])
        adjust_weights(x_train[i])
        show_learning()
    for i in range(len(x_train)):
        forward_pass(x_train[i])
        print('x1 =', '%4.1f' % x_train[i][1], ', x2 =',
              '%4.1f' % x_train[i][2], ', y = ',
              '%.4f' % n_y[2])
        if(((y_train[i] >= 0.5) and (n_y[2] < 0.5)) or ((y_train[i] >= 0.5) and (n_y[2] < 0.5))):
            all_correct = False


Current weights:
neuron  0 : w0 =  0.01 , w1 =  0.09 , w2 =  0.41
neuron  1 : w0 =  0.01 , w1 = -0.43 , w2 =  0.01
neuron  2 : w0 =  0.01 , w1 =  0.78 , w2 =  0.80
-----------------
Current weights:
neuron  0 : w0 =  0.02 , w1 =  0.09 , w2 =  0.40
neuron  1 : w0 =  0.02 , w1 = -0.44 , w2 =  0.00
neuron  2 : w0 =  0.03 , w1 =  0.77 , w2 =  0.80
-----------------
Current weights:
neuron  0 : w0 =  0.01 , w1 =  0.09 , w2 =  0.41
neuron  1 : w0 =  0.01 , w1 = -0.43 , w2 =  0.01
neuron  2 : w0 =  0.01 , w1 =  0.78 , w2 =  0.80
-----------------
Current weights:
neuron  0 : w0 =  0.00 , w1 =  0.10 , w2 =  0.42
neuron  1 : w0 =  0.00 , w1 = -0.42 , w2 =  0.02
neuron  2 : w0 =  0.00 , w1 =  0.79 , w2 =  0.79
-----------------
x1 = -1.0 , x2 = -1.0 , y =  0.4818
x1 = -1.0 , x2 = -1.0 , y =  0.4818
x1 = -1.0 , x2 = -1.0 , y =  0.4818
x1 = -1.0 , x2 = -1.0 , y =  0.4818
Current weights:
neuron  0 : w0 = -0.01 , w1 =  0.11 , w2 =  0.42
neuron  1 : w0 = -0.01 , w1 = -0.41 , w2 =  0.03
neuron  2 : w