In [1]:
import copy, numpy as np
np.random.seed(0)

# sigmoid activation
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# sigmoid derivative
def sigmoid_dr(output):
    return output*(1-output)

# initilizing dictionary for integer to binary 
int2binary = {}
binary_dim = 8 #no of bits or siquence length

largest_number = 2**binary_dim
binary = np.unpackbits(
    np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

# input variables
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1


# initialize neural network weights
wax = 2*np.random.random((input_dim,hidden_dim)) - 1
way = 2*np.random.random((hidden_dim,output_dim)) - 1
waa = 2*np.random.random((hidden_dim,hidden_dim)) - 1

wax_update = np.zeros_like(wax)
way_update = np.zeros_like(way)
waa_update = np.zeros_like(waa)

# training logic
for j in range(10000):
    a_int = np.random.randint(largest_number/2)
    a = int2binary[a_int] 
    b_int = np.random.randint(largest_number/2) 
    b = int2binary[b_int] 
    c_int = a_int + b_int
    c = int2binary[c_int]
    d = np.zeros_like(c)

    TotalLoss = 0
    
    drivatives = list()
    activation_values = list()#a's
    activation_values.append(np.zeros(hidden_dim))
    
    # moving along the positions in the binary encoding
    for position in range(binary_dim):
        
        # generate input and output
        X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]])
        y = np.array([[c[binary_dim - position - 1]]]).T

        # a=g(wax*xi+waa*at-1)
        at = sigmoid(np.dot(X,wax) + np.dot(activation_values[-1],waa))

        # yt=g(way*at)
        ypred = sigmoid(np.dot(at,way))
        #calculation loss
        loss = y - ypred
        #dl/da=loss*(a*(1-a))
        drivatives.append((loss)*sigmoid_dr(ypred))
        TotalLoss += np.abs(loss[0])
    
        # decode estimate so we can print it out
        d[binary_dim - position - 1] = np.round(ypred[0][0])
        
        # store hidden layer so we can use it in the next timestep
        activation_values.append(copy.deepcopy(at))
    
    final_Activation_drs = np.zeros(hidden_dim)
    
    for position in range(binary_dim):
        
        X = np.array([[a[position],b[position]]])
        at = activation_values[-position-1]
        prev_at = activation_values[-position-2]
        
        # error at output layer
        drivative2 = drivatives[-position-1]
        # error at hidden layer
        drivative1 = (final_Activation_drs.dot(waa.T) + drivative2.dot(way.T)) * sigmoid_dr(at)

        # let's update all our weights so we can try again
        way_update += np.atleast_2d(at).T.dot(drivative2)
        waa_update += np.atleast_2d(prev_at).T.dot(drivative1)
        wax_update += X.T.dot(drivative1)
        
        final_Activation_drs = drivative1
    

    wax += wax_update * alpha
    way += way_update * alpha
    waa += waa_update * alpha    

    wax_update *= 0
    way_update *= 0
    waa_update *= 0
    
    # print out progress
    if(j % 1000 == 0):
        print(f"Error:{ str(TotalLoss)}")
        print(f"Pred:  {str(d)}")
        print(f"True:  {str(c)}")
        out = 0
        for index,x in enumerate(reversed(d)):
            out += x*pow(2,index)
        print(f"{str(a_int)}  +  { str(b_int)}  = str(out)")
        print("------------")

Error:[3.45638663]
Pred:  [0 0 0 0 0 0 0 1]
True:  [0 1 0 0 0 1 0 1]
9  +  60  = str(out)
------------
Error:[3.63389116]
Pred:  [1 1 1 1 1 1 1 1]
True:  [0 0 1 1 1 1 1 1]
28  +  35  = str(out)
------------
Error:[3.91366595]
Pred:  [0 1 0 0 1 0 0 0]
True:  [1 0 1 0 0 0 0 0]
116  +  44  = str(out)
------------
Error:[3.72191702]
Pred:  [1 1 0 1 1 1 1 1]
True:  [0 1 0 0 1 1 0 1]
4  +  73  = str(out)
------------
Error:[3.5852713]
Pred:  [0 0 0 0 1 0 0 0]
True:  [0 1 0 1 0 0 1 0]
71  +  11  = str(out)
------------
Error:[2.53352328]
Pred:  [1 0 1 0 0 0 1 0]
True:  [1 1 0 0 0 0 1 0]
81  +  113  = str(out)
------------
Error:[0.57691441]
Pred:  [0 1 0 1 0 0 0 1]
True:  [0 1 0 1 0 0 0 1]
81  +  0  = str(out)
------------
Error:[1.42589952]
Pred:  [1 0 0 0 0 0 0 1]
True:  [1 0 0 0 0 0 0 1]
4  +  125  = str(out)
------------
Error:[0.47477457]
Pred:  [0 0 1 1 1 0 0 0]
True:  [0 0 1 1 1 0 0 0]
39  +  17  = str(out)
------------
Error:[0.21595037]
Pred:  [0 0 0 0 1 1 1 0]
True:  [0 0 0 0 1 1 1 