In [37]:
import numpy as np
import copy

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
def derivativeSigmoid(x):
    return x * (1 - x)

In [4]:
int2binary = {}
binary_dim = 8

largest_number = pow(2,binary_dim)

In [30]:
binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1)

In [31]:
binary

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [1, 1, 1, ..., 1, 0, 1],
       [1, 1, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)

In [32]:
binary.shape

(256, 8)

In [33]:
for i in range(largest_number):
    int2binary[i] = binary[i]

In [22]:
# int2binary

In [34]:
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

In [48]:
# weights for hidden layer
weight_hidden = np.random.random((input_dim, hidden_dim))

# weights for output layer
weight_output = np.random.random((hidden_dim, output_dim))

# weights that connect hidden layer in previous time step to the hidden layer in current time step
# it also connect hidden layer in current time step to hidden layer in next time step
weight_time = np.random.random((hidden_dim, hidden_dim))

In [49]:
weight_hidden_update = np.zeros_like(weight_hidden)
weight_output_update = np.zeros_like(weight_output)
weight_time_update = np.zeros_like(weight_time)

In [50]:
epochs = 100000
for epoch in range(epochs):
    a_int = np.random.randint(largest_number/2)
    a = int2binary[a_int]
    b_int = np.random.randint(largest_number/2)
    b = int2binary[b_int]
    
    c_int = a_int + b_int
    c = int2binary[c_int]
    
    d = np.zeros_like(c)
    overallError = 0
    
    layer_1_values = list()
    layer_2_deltas = list()
    layer_1_values.append(np.zeros(hidden_dim))
    
    for position in range(binary_dim):
        X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])
        y = np.array([[c[binary_dim - position - 1]]]).T
        
        # hidden layer
        layer_1 = sigmoid(np.dot(X,weight_hidden) + np.dot(layer_1_values[-1], weight_time))
        
        # output Layer
        layer_2 = sigmoid(np.dot(layer_1, weight_output))
        
        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error) * derivativeSigmoid(layer_2))
        
        overallError += np.abs(layer_2_error[0])
        
        d[binary_dim - position - 1] = np.round(layer_2[0][0])
        
        # store hidden layer so we can use it in next time step
        layer_1_values.append(copy.deepcopy(layer_1))
        
    future_layer_1_delta = np.zeros(hidden_dim)
    
    for position in range(binary_dim):
        X = np.array([[a[position], b[position]]])
        layer_1 = layer_1_values[-position - 1]
        prev_layer_1 = layer_1_values[-position - 2]
        
        # error at output layer
        layer_2_delta = layer_2_deltas[-position-1]
        
        # error at hidden layer
        layer_1_delta = (future_layer_1_delta.dot(weight_time.T) + layer_2_delta.dot(weight_output.T)) * derivativeSigmoid(layer_1)
        
        weight_output_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        weight_time_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        weight_hidden_update += X.T.dot(layer_1_delta)
        
        future_layer_1_delta = layer_1_delta
        
    weight_hidden += weight_hidden_update * alpha
    weight_output += weight_output_update * alpha
    weight_time += weight_time_update * alpha
    
    weight_hidden_update *= 0
    weight_output_update *= 0
    weight_time_update *= 0
    
    if(epoch%10000 == 0):
        print("Error",overallError)
        print("Pred",d)
        print("True",c)
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x*pow(2, index)
        print("{} + {} = {}".format(a_int,b_int,out))
        print("---------------------------")
    

Error [6.99651326]
Pred [1 1 1 1 1 1 1 1]
True [1 0 0 0 0 0 0 0]
125 + 3 = 255
---------------------------
Error [3.78270064]
Pred [1 1 1 1 1 1 1 0]
True [1 1 1 1 0 1 0 0]
119 + 125 = 254
---------------------------
Error [2.34075236]
Pred [0 0 1 1 1 1 1 0]
True [0 0 1 0 1 1 1 0]
33 + 13 = 62
---------------------------
Error [0.55480596]
Pred [1 0 1 1 1 1 0 1]
True [1 0 1 1 1 1 0 1]
104 + 85 = 189
---------------------------
Error [0.30976701]
Pred [1 1 0 1 0 1 1 1]
True [1 1 0 1 0 1 1 1]
94 + 121 = 215
---------------------------
Error [0.17628169]
Pred [0 0 0 1 0 1 1 0]
True [0 0 0 1 0 1 1 0]
18 + 4 = 22
---------------------------
Error [0.05795546]
Pred [0 1 0 0 1 1 0 0]
True [0 1 0 0 1 1 0 0]
74 + 2 = 76
---------------------------
Error [0.11840709]
Pred [1 0 1 1 1 0 0 0]
True [1 0 1 1 1 0 0 0]
87 + 97 = 184
---------------------------
Error [0.07950283]
Pred [1 1 0 1 1 0 1 0]
True [1 1 0 1 1 0 1 0]
119 + 99 = 218
---------------------------
Error [0.06205393]
Pred [1 1 0 0 1 1 