In [1]:
import numpy as np
import copy

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
def derivativeSigmoid(x):
    return x * (1 - x)

In [4]:
int2binary = {}
binary_dim = 8
largest_num = pow(2, binary_dim)

In [5]:
largest_num

256

In [10]:
binary = np.unpackbits(np.array([range(largest_num)], dtype = np.uint8).T, axis=1)

In [11]:
binary

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [1, 1, 1, ..., 1, 0, 1],
       [1, 1, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)

In [12]:
binary.shape

(256, 8)

In [13]:
binary[10]

array([0, 0, 0, 0, 1, 0, 1, 0], dtype=uint8)

In [19]:
binary[9]

array([0, 0, 0, 0, 1, 0, 0, 1], dtype=uint8)

In [16]:
for i in range(largest_num):
    int2binary[i] = binary[i]

In [18]:
# int2binary

In [20]:
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

In [31]:
wh = np.random.random((input_dim, hidden_dim))
wout = np.random.random((hidden_dim, output_dim))
wtime = np.random.random((hidden_dim, hidden_dim))

In [32]:
wh_update = np.zeros_like(wh)
wout_update = np.zeros_like(wout)
wtime_update = np.zeros_like(wtime)

In [33]:
epochs = 100000

In [34]:
for epoch in range(epochs):
    a_int = np.random.randint(largest_num / 2)
    a = int2binary[a_int]
    b_int = np.random.randint(largest_num / 2)
    b = int2binary[b_int]
    
    c_int = a_int + b_int
    c = int2binary[c_int]
    
    d = np.zeros_like(c)
    overallError = 0
    
    layer_1_values = list()
    layer_2_deltas = list()
    layer_1_values.append(np.zeros(hidden_dim))
    
    for position in range(binary_dim):
        X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])
        y = np.array([[c[binary_dim - position - 1]]]).T
        
        # hidden layer
        layer_1 = sigmoid(np.dot(X,wh) + np.dot(layer_1_values[-1], wtime) )
        
        # output layer
        layer_2 = sigmoid(np.dot(layer_1, wout))
        
        # bptt - backpropagation throught time
        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error) * derivativeSigmoid(layer_2[0][0]))
        
        overallError += np.abs(layer_2_error[0])
        
        d[binary_dim - position - 1] = np.round(layer_2[0][0])
        
        layer_1_values.append(copy.deepcopy(layer_1))
    
    future_layer_1_delta = np.zeros(hidden_dim)
    
    for position in range(binary_dim):
        X = np.array([[a[position], b[position]]])
        layer_1 = layer_1_values[-position - 1]
        prev_layer_1 = layer_1_values[-position - 2]
        
        # error at output layer
        layer_2_delta = layer_2_deltas[-position - 1]
        
        # error at hidden layer
        error_hidden = (future_layer_1_delta.dot(wtime.T) + layer_2_delta.dot(wout.T))
        layer_1_delta = error_hidden * derivativeSigmoid(layer_1)
        
        wout_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        wtime_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        wh_update += X.T.dot(layer_1_delta)
        
        future_layer_1_delta = layer_1_delta
        
    wh += wh_update * alpha
    wout += wout_update * alpha
    wtime += wtime_update * alpha
    
    wh_update *= 0
    wout_update *= 0
    wtime_update *= 0
    
    if(epoch % 1000 == 0):
        print("Error : {}".format(overallError))
        print("Pred : {}".format(d))
        print("Actual : {}".format(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print("{} + {} = {}".format(a_int, b_int, c_int))
        print("{} + {} = {}".format(a_int, b_int, out))
        print("=========================================")

Error : [3.00609776]
Pred : [1 1 1 1 1 1 1 1]
Actual : [0 1 1 0 1 0 1 1]
16 + 91 = 107
16 + 91 = 255
Error : [4.04141986]
Pred : [1 1 1 1 1 1 1 1]
Actual : [0 1 0 0 1 0 0 0]
45 + 27 = 72
45 + 27 = 255
Error : [3.86373059]
Pred : [0 0 0 0 0 0 0 0]
Actual : [1 0 1 0 0 1 0 0]
113 + 51 = 164
113 + 51 = 0
Error : [3.7128309]
Pred : [1 1 1 1 1 1 1 1]
Actual : [0 1 1 1 0 1 1 0]
111 + 7 = 118
111 + 7 = 255
Error : [3.54946242]
Pred : [0 0 0 0 0 0 0 0]
Actual : [1 0 0 0 0 0 1 0]
37 + 93 = 130
37 + 93 = 0
Error : [3.83387304]
Pred : [0 0 0 0 0 0 0 0]
Actual : [1 0 0 0 1 0 1 0]
41 + 97 = 138
41 + 97 = 0
Error : [4.06720926]
Pred : [0 0 0 0 0 0 0 1]
Actual : [1 0 1 1 0 0 0 0]
95 + 81 = 176
95 + 81 = 1
Error : [3.95368412]
Pred : [1 1 1 1 1 1 1 1]
Actual : [1 0 0 1 0 1 1 0]
56 + 94 = 150
56 + 94 = 255
Error : [3.58742278]
Pred : [0 0 0 0 0 0 0 0]
Actual : [0 0 0 1 1 0 0 0]
15 + 9 = 24
15 + 9 = 0
Error : [3.80723098]
Pred : [0 0 0 0 0 0 0 1]
Actual : [0 1 0 0 1 0 1 1]
0 + 75 = 75
0 + 75 = 1
Error : 

Error : [0.10380691]
Pred : [1 0 0 1 0 1 1 1]
Actual : [1 0 0 1 0 1 1 1]
119 + 32 = 151
119 + 32 = 151
Error : [0.2255145]
Pred : [0 1 0 0 0 1 1 0]
Actual : [0 1 0 0 0 1 1 0]
10 + 60 = 70
10 + 60 = 70
Error : [0.19126086]
Pred : [1 0 1 1 0 1 0 1]
Actual : [1 0 1 1 0 1 0 1]
88 + 93 = 181
88 + 93 = 181
Error : [0.19987727]
Pred : [0 1 1 0 1 0 0 0]
Actual : [0 1 1 0 1 0 0 0]
21 + 83 = 104
21 + 83 = 104
Error : [0.12320708]
Pred : [0 0 1 1 1 1 0 0]
Actual : [0 0 1 1 1 1 0 0]
12 + 48 = 60
12 + 48 = 60
Error : [0.14962947]
Pred : [1 0 0 1 0 1 0 1]
Actual : [1 0 0 1 0 1 0 1]
67 + 82 = 149
67 + 82 = 149
Error : [0.18044513]
Pred : [0 1 1 1 0 1 0 1]
Actual : [0 1 1 1 0 1 0 1]
59 + 58 = 117
59 + 58 = 117
Error : [0.1654377]
Pred : [1 1 0 0 1 0 0 0]
Actual : [1 1 0 0 1 0 0 0]
118 + 82 = 200
118 + 82 = 200
Error : [0.14824907]
Pred : [0 1 1 1 0 1 0 1]
Actual : [0 1 1 1 0 1 0 1]
31 + 86 = 117
31 + 86 = 117
Error : [0.18816184]
Pred : [0 1 1 0 0 0 0 0]
Actual : [0 1 1 0 0 0 0 0]
45 + 51 = 96
45 + 51