# Learning XOR

We are trying to make a simple feedforward network to learn the XOR gate behaviour.

## Getting the data

In [91]:
import pandas as pd

XOR = pd.DataFrame([
    [0, 0, 0], 
    [0, 1, 1], 
    [1, 0, 1], 
    [1, 1, 0]], columns=["x_1", "x_2", "y"])

XOR

Unnamed: 0,x_1,x_2,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [97]:
import numpy as np

num_features = 2
num_iter = 100000
display_step = int(num_iter / 10)
learning_rate = 0.01

num_input = 2          # units in the input layer 28x28 images
num_hidden1 = 2        # units in the first hidden layer
num_output = 1         # units in the output, only one output 0 or 1

# x, y = XOR.as_matrix(columns=["x_1", "x_2"]), XOR.as_matrix(columns=["y"])
# y = np.reshape(y, [4,1]) 

x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], np.float32)  # 4x2, input
y = np.array([0, 1, 1, 0], np.float32)                      # 4, correct output, AND operation
y = np.reshape(y, [4,1]) 

# trainum_inputg data and labels
X = tf.placeholder('float', [None, num_input])     # training data
Y = tf.placeholder('float', [None, num_output])    # labels

MLP approximation function

In [98]:
def multi_layer_perceptron_xor(x, weights, biases):
  hidden_layer1 = tf.add(tf.matmul(x, weights['w_h1']), biases['b_h1'])
  hidden_layer1 = tf.nn.sigmoid(hidden_layer1)
  out_layer = tf.add(tf.matmul(hidden_layer1, weights['w_out']), biases['b_out'])
 
  return out_layer

In [99]:
import tensorflow as tf
weights = {
 'w_h1' : tf.Variable(tf.random_normal([num_input, num_hidden1])),
 'w_out': tf.Variable(tf.random_normal([num_hidden1, num_output]))
}
 
biases = {
 'b_h1' : tf.Variable(tf.zeros([num_hidden1])),
 'b_out': tf.Variable(tf.zeros([num_output]))
}

Create model

In [100]:
model = multi_layer_perceptron_xor(X, weights, biases)

Train the model.

In [101]:
'''
- cost function and optimization
- sigmoid cross entropy -- single output
- softmax cross entropy -- multiple output, normalized
'''
loss_func = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=model, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss_func)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for k in range(num_iter):
    tmp_cost, _ = sess.run([loss_func, optimizer], feed_dict={X: x, Y: y})
    if k % display_step == 0:
        print('output: ', sess.run(model, feed_dict={X:x}))
        print('loss= ' + "{:.5f}".format(tmp_cost))

# separates the input space
W = np.squeeze(sess.run(weights['w_h1']))   # 2x2
b = np.squeeze(sess.run(biases['b_h1']))    # 2,

sess.close()

output:  [[0.92657596]
 [1.0481822 ]
 [1.271105  ]
 [1.3435489 ]]
loss= 3.40361
output:  [[-2.1764343 ]
 [ 0.51524925]
 [ 0.8902612 ]
 [ 0.44862747]]
loss= 1.86233
output:  [[-2.7589946]
 [ 2.940815 ]
 [ 2.9366026]
 [-2.5233374]]
loss= 0.24176
output:  [[-3.5402842]
 [ 3.791689 ]
 [ 3.7888622]
 [-3.414403 ]]
loss= 0.10564
output:  [[-3.9874058]
 [ 4.259204 ]
 [ 4.257001 ]
 [-3.8916216]]
loss= 0.06669
output:  [[-4.29852  ]
 [ 4.5809402]
 [ 4.57909  ]
 [-4.2174873]]
loss= 0.04853
output:  [[-4.5368414]
 [ 4.825783 ]
 [ 4.8241653]
 [-4.4647474]]
loss= 0.03808
output:  [[-4.7297163]
 [ 5.0233507]
 [ 5.021901 ]
 [-4.6639595]]
loss= 0.03131
output:  [[-4.891535 ]
 [ 5.1890335]
 [ 5.187788 ]
 [-4.830205 ]]
loss= 0.02657
output:  [[-5.0314074]
 [ 5.330593 ]
 [ 5.32938  ]
 [-4.97371  ]]
loss= 0.02307
