In [1]:
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)

In [2]:
# set data
x_data = [[0, 0],
          [0, 1],
          [1, 0],
          [1, 1]]
y_data = [[0],
          [1],
          [1],
          [0]]

x_data = np.array(x_data, dtype=np.float32)
y_data = np.array(y_data, dtype=np.float32)

In [3]:
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None, 1])

In [4]:
W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1')
b1 = tf.Variable(tf.random_normal([2]), name='bias1')
l1 = tf.sigmoid(tf.matmul(X, W1) + b1)

In [5]:
W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
Y_pred = tf.sigmoid(tf.matmul(l1, W2) + b2)

In [6]:
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(Y_pred) + (1 - Y) * tf.log(1 - Y_pred))

## Backpropagation

In [7]:
# Network
#          p1     a1           l1     p2     a2           l2 (y_pred)
# X -> (*) -> (+) -> (sigmoid) -> (*) -> (+) -> (sigmoid) -> (loss)
#       ^      ^                   ^      ^
#       |      |                   |      |
#       W1     b1                  W2     b2

# Loss derivative
d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)

In [8]:
# Layer 2
d_sigma2 = Y_pred * (1 - Y_pred)
d_a2 = d_Y_pred * d_sigma2
d_p2 = d_a2
d_b2 = d_a2
d_W2 = tf.matmul(tf.transpose(l1), d_p2)

In [9]:
# Mean
d_b2_mean = tf.reduce_mean(d_b2, axis=[0])
d_W2_mean = d_W2 / tf.cast(tf.shape(l1)[0], dtype=tf.float32)

In [10]:
# Layer 1
d_l1 = tf.matmul(d_p2, tf.transpose(W2))
d_sigma1 = l1 * (1 - l1)
d_a1 = d_l1 * d_sigma1
d_b1 = d_a1
d_p1 = d_a1
d_W1 = tf.matmul(tf.transpose(X), d_a1)

In [11]:
# Mean
d_W1_mean = d_W1 / tf.cast(tf.shape(X)[0], dtype=tf.float32)
d_b1_mean = tf.reduce_mean(d_b1, axis=[0])

In [12]:
# Weight update
learning_rate = 0.1
step = [
  tf.assign(W2, W2 - learning_rate * d_W2_mean),
  tf.assign(b2, b2 - learning_rate * d_b2_mean),
  tf.assign(W1, W1 - learning_rate * d_W1_mean),
  tf.assign(b1, b1 - learning_rate * d_b1_mean)
]

In [13]:
# Accuracy computation
# True if hypothesis > 0.5 else False
predicted = tf.cast(Y_pred > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

In [14]:
# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    print("shape", sess.run(tf.shape(X)[0], feed_dict={X: x_data}))


    for i in range(10001):
        sess.run([step, cost], feed_dict={X: x_data, Y: y_data})
        if i % 1000 == 0:
            print(i, sess.run([cost, d_W1], feed_dict={
                  X: x_data, Y: y_data}), sess.run([W1, W2]))

    # Accuracy report
    h, c, a = sess.run([Y_pred, predicted, accuracy],
                       feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)

shape 4
0 [0.75390214, array([[ 0.147971  , -0.05841584],
       [ 0.09894869, -0.06082734]], dtype=float32)] [array([[ 0.79886746,  0.68011874],
       [-1.21986341, -0.30361032]], dtype=float32), array([[ 1.37522972],
       [-0.78823853]], dtype=float32)]
1000 [0.67122912, array([[-0.05335814, -0.00569674],
       [ 0.04508685,  0.00137657]], dtype=float32)] [array([[ 1.15745568,  0.70470017],
       [-1.85447919, -0.15281098]], dtype=float32), array([[ 1.38862789],
       [-0.87442583]], dtype=float32)]
2000 [0.53393614, array([[-0.07984745, -0.06233264],
       [ 0.06753156,  0.05506042]], dtype=float32)] [array([[ 3.05309558,  1.38850737],
       [-3.45586419, -0.65311229]], dtype=float32), array([[ 3.28347206],
       [-1.56185806]], dtype=float32)]
3000 [0.1978671, array([[-0.04035231, -0.07028952],
       [ 0.03509892,  0.0806966 ]], dtype=float32)] [array([[ 4.6234231 ,  3.50294638],
       [-4.75216198, -3.03614283]], dtype=float32), array([[ 5.56490231],
       [-4.34472084