In [66]:
"""
   Copyright 2018 (c) Jinxin Xie

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""
import copy, numpy as np

#np.random.seed(0)


# compute sigmoid nonlinearity
def sigmoid(x):
    output = 1 / (1 + np.exp(-x))
    return output


# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output * (1 - output)


# training dataset generation
int2binary = {}
binary_dim = 8

largest_number = pow(2, binary_dim)
binary = np.unpackbits(
    np.array([range(largest_number)], dtype=np.uint8).T, axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

# input variables
alpha = 0.1
input_dim = 2
hidden_dim = 32
output_dim = 1

#calculate
biggest = int(largest_number ** 0.5)

# initialize neural network weights
synapse_0 = 2 * np.random.random((input_dim, hidden_dim)) - 1
synapse_1 = 2 * np.random.random((hidden_dim, output_dim)) - 1
synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1

synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

# training logic
for j in range(100000):


    # generate a simple addition problem (a + b = c)
    a_int = np.random.randint(biggest)  # int version
    a = int2binary[a_int]  # binary encoding

    b_int = np.random.randint(biggest)  # int version
    b = int2binary[b_int]  # binary encoding

    # true answer
    c_int = a_int * b_int
    c = int2binary[c_int]

    # where we'll store our best guess (binary encoded)
    d = np.zeros_like(c)

    overallError = 0

    layer_2_deltas = list()
    layer_1_values = list()
    layer_1_values.append(np.zeros(hidden_dim))

    # moving along the positions in the binary encoding
    for position in range(binary_dim):
        # generate input and output
        X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])
        y = np.array([[c[binary_dim - position - 1]]]).T

        # hidden layer (input ~+ prev_hidden)
        layer_1 = sigmoid(np.dot(X, synapse_0) + np.dot(layer_1_values[-1], synapse_h))

        # output layer (new binary representation)
        layer_2 = sigmoid(np.dot(layer_1, synapse_1))

        # did we miss?... if so by how much?
        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error) * sigmoid_output_to_derivative(layer_2))
        overallError += np.abs(layer_2_error[0])

        # decode estimate so we can print it out
        d[binary_dim - position - 1] = np.round(layer_2[0][0])

        # store hidden layer so we can use it in the next timestep
        layer_1_values.append(copy.deepcopy(layer_1))

    future_layer_1_delta = np.zeros(hidden_dim)

    for position in range(binary_dim):
        X = np.array([[a[position], b[position]]])
        layer_1 = layer_1_values[-position - 1]
        prev_layer_1 = layer_1_values[-position - 2]

        # error at output layer
        layer_2_delta = layer_2_deltas[-position - 1]
        # error at hidden layer
        layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + \
                         layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
        # let's update all our weights so we can try again
        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        synapse_0_update += X.T.dot(layer_1_delta)

        future_layer_1_delta = layer_1_delta
        pass
    synapse_0 += synapse_0_update * alpha
    synapse_1 += synapse_1_update * alpha
    synapse_h += synapse_h_update * alpha

    synapse_0_update *= 0
    synapse_1_update *= 0
    synapse_h_update *= 0

    # print out progress
    if (j % 1000 == 0):
        print(
        "Error:" + str(overallError))
        print(
        "Pred:" + str(d))
        print(
        "True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(
        str(a_int) + " * " + str(b_int) + " = " + str(out))
        print(
        "------------")
      


Error:[ 3.01417168]
Pred:[0 0 0 0 0 0 0 0]
True:[0 0 0 1 0 0 1 0]
9 + 2 = 0
------------
Error:[ 3.95158414]
Pred:[0 0 1 1 1 1 0 0]
True:[1 0 1 1 0 1 1 0]
14 + 13 = 60
------------
Error:[ 4.52190442]
Pred:[0 0 0 0 1 1 1 1]
True:[1 1 1 0 0 0 0 1]
15 + 15 = 15
------------
Error:[ 4.07054455]
Pred:[0 0 0 0 0 0 1 1]
True:[1 1 1 0 0 0 0 1]
15 + 15 = 3
------------
Error:[ 2.20043608]
Pred:[0 0 1 1 1 1 0 0]
True:[0 0 1 1 1 1 0 0]
12 + 5 = 60
------------
Error:[ 1.48423858]
Pred:[0 0 0 1 1 1 1 1]
True:[0 0 0 0 1 1 1 1]
3 + 5 = 31
------------
Error:[ 2.08712365]
Pred:[0 1 1 1 1 0 0 0]
True:[0 1 1 0 1 0 0 0]
13 + 8 = 120
------------
Error:[ 1.5010675]
Pred:[0 0 1 1 1 0 0 0]
True:[0 0 1 0 1 0 0 0]
5 + 8 = 56
------------
Error:[ 3.49081526]
Pred:[0 1 0 0 1 1 0 0]
True:[1 0 0 0 0 1 0 0]
11 + 12 = 76
------------
Error:[ 2.35626166]
Pred:[0 0 0 0 1 0 1 0]
True:[0 0 0 1 0 1 1 0]
2 + 11 = 10
------------
Error:[ 2.48987964]
Pred:[0 0 0 0 0 0 1 0]
True:[0 1 0 0 0 1 1 0]
5 + 14 = 2
------------
E

Error:[ 0.00028789]
Pred:[0 0 0 0 0 0 0 0]
True:[0 0 0 0 0 0 0 0]
0 + 0 = 0
------------
Error:[ 0.34147318]
Pred:[1 0 1 1 0 1 0 0]
True:[1 0 1 1 0 1 0 0]
12 + 15 = 180
------------
Error:[ 0.07740641]
Pred:[0 0 0 1 1 1 1 0]
True:[0 0 0 1 1 1 1 0]
3 + 10 = 30
------------
Error:[ 0.87040779]
Pred:[0 0 0 1 1 0 0 0]
True:[0 0 1 1 1 0 0 0]
8 + 7 = 24
------------
Error:[ 0.02799928]
Pred:[0 0 0 0 0 0 1 0]
True:[0 0 0 0 0 0 1 0]
2 + 1 = 2
------------
Error:[ 0.00024707]
Pred:[0 0 0 0 0 0 0 0]
True:[0 0 0 0 0 0 0 0]
0 + 8 = 0
------------
Error:[ 0.92043709]
Pred:[1 1 0 1 0 1 0 0]
True:[1 1 0 0 0 1 0 0]
14 + 14 = 212
------------
Error:[ 0.07041323]
Pred:[0 0 1 1 1 1 0 0]
True:[0 0 1 1 1 1 0 0]
15 + 4 = 60
------------
Error:[ 0.09893256]
Pred:[0 0 0 0 0 1 1 1]
True:[0 0 0 0 0 1 1 1]
7 + 1 = 7
------------


array([[ 2.01598634,  1.62118149,  3.56140567],
       [ 5.83281336,  5.95625446,  1.83518847],
       [ 2.79926956,  0.81150962,  6.88545512]])