In [1]:
import copy 
import numpy as np
from IPython.display import display

In [2]:
np.random.seed(42)

#### Create the sigmoid function

In [3]:
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

In [4]:
def sigmoid_output_to_derivative(output):
    return output*(1-output)

#### Fake dataset 

In [5]:
int2binary = {}
binary_dim = 8

In [6]:
largest_number = pow(2,binary_dim)

In [7]:
largest_number

256

In [8]:
binary = np.unpackbits(np.array([range(largest_number)],dtype=np.uint8).T,axis=1)

In [9]:
binary

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 1, 0],
       ..., 
       [1, 1, 1, ..., 1, 0, 1],
       [1, 1, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 1, 1, 1]], dtype=uint8)

In [10]:
for i in range(largest_number):
    int2binary[i] = binary[i]

#### Parameters

In [11]:
alpha = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1

#### "Synapses"

In [12]:
synapse_0 = 2*np.random.random((input_dim,hidden_dim)) - 1
synapse_1 = 2*np.random.random((hidden_dim,output_dim)) - 1
synapse_h = 2*np.random.random((hidden_dim,hidden_dim)) - 1

In [12]:
display(synapse_0[:2])
display(synapse_1[:2])
display(synapse_h[:2])

array([[-0.25091976,  0.90142861,  0.46398788,  0.19731697, -0.68796272,
        -0.68801096, -0.88383278,  0.73235229,  0.20223002,  0.41614516,
        -0.95883101,  0.9398197 ,  0.66488528, -0.57532178, -0.63635007,
        -0.63319098],
       [-0.39151551,  0.04951286, -0.13610996, -0.41754172,  0.22370579,
        -0.72101228, -0.4157107 , -0.26727631, -0.08786003,  0.57035192,
        -0.60065244,  0.02846888,  0.18482914, -0.90709917,  0.2150897 ,
        -0.65895175]])

array([[-0.86989681],
       [ 0.89777107]])

array([[ 0.09342056, -0.63029109,  0.93916926,  0.55026565,  0.87899788,
         0.7896547 ,  0.19579996,  0.84374847, -0.823015  , -0.60803428,
        -0.90954542, -0.34933934, -0.22264542, -0.45730194,  0.65747502,
        -0.28649335],
       [-0.43813098,  0.08539217, -0.71815155,  0.60439396, -0.85089871,
         0.97377387,  0.54448954, -0.60256864, -0.98895577,  0.63092286,
         0.41371469,  0.45801434,  0.54254069, -0.8519107 , -0.28306854,
        -0.76826188]])

#### Update the weights with an array of the same shape, but all zeros

In [13]:
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

In [14]:
display(synapse_0_update[:2])
display(synapse_1_update[:2])
display(synapse_h_update[:2])

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.]])

array([[ 0.],
       [ 0.]])

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.]])

In [18]:
for j in range(112000):
    a_int = np.random.randint(largest_number/2) 
    # print(a_int)
    a = int2binary[a_int]
    # print(a)
    b_int = np.random.randint(largest_number/2) 
    b = int2binary[b_int]
    c_int = a_int + b_int
    c = int2binary[c_int]
    # print(c)
    d = np.zeros_like(c)
    # print(d)
    overallError = 0
    
    layer_2_deltas = list()
    layer_1_values = list()
    layer_1_values.append(np.zeros(hidden_dim))
    
    # now go from one position to the next of the binary encoding
    for position in range(binary_dim):
        X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]])
        # print(X)
        y = np.array([[c[binary_dim - position - 1]]]).T
        # print(y)
        
        layer_1 = sigmoid(np.dot(X,synapse_0) + np.dot(layer_1_values[-1],synapse_h))
        layer_2 = sigmoid(np.dot(layer_1,synapse_1))
        
        # did i mess up? probably. how much did I mess up?
        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error)*sigmoid_output_to_derivative(layer_2))
        overallError += np.abs(layer_2_error[0])
        
        # print(overallError)
        # decode the estimate, so that we can print it out
        d[binary_dim - position - 1] = np.round(layer_2[0][0])
        
        #now append all those to the list for the next layer
        layer_1_values.append(copy.deepcopy(layer_1))
        # print(layer_1_values) -- this is where printing takes awhile. I saved as layer_1_values in folder
    # this is the np array size. we save that for the next layer    
    future_layer_1_delta = np.zeros(hidden_dim)
    # print(future_layer_1_delta)

    for position in range(binary_dim):
        X = np.array([[a[position],b[position]]])
        # print(X)
        layer_1 = layer_1_values[-position-1]
        # print(layer_1)
        prev_layer_1 = layer_1_values[-position-2]
        
        # error of the output layer
        layer_2_delta = layer_2_deltas[-position-1]
        
        # error of the hidden layer
        layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) 
                         + layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
        # print(layer_1_delta)   
        # update all the weights, then we can do it again yayyyyy
        
        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)        
        synapse_0_update += X.T.dot(layer_1_delta)
        # print(synapse_0_update)
        future_layer_1_delta = layer_1_delta
        # print(future_layer_1_delta)
        
        synapse_0 += synapse_0_update * alpha
        synapse_1 += synapse_1_update * alpha
        synapse_h += synapse_h_update * alpha   

        synapse_0_update *= 0
        synapse_1_update *= 0
        synapse_h_update *= 0       
        
        
        # print out the display as this mofo learns
        if(j % 1000 == 0):
            print "Error:" + str(overallError)
            print "Pred:" + str(d)
            print "True:" + str(c)
            out = 0
            
            for index,x in enumerate(reversed(d)):
                out += x*pow(2,index)
                
                print str(a_int) + " + " + str(b_int) + " = " + str(out)
                
                print "------------"


Error:[ 0.05766743]
Pred:[0 1 1 1 1 1 0 1]
True:[0 1 1 1 1 1 0 1]
15 + 110 = 1
------------
15 + 110 = 1
------------
15 + 110 = 5
------------
15 + 110 = 13
------------
15 + 110 = 29
------------
15 + 110 = 61
------------
15 + 110 = 125
------------
15 + 110 = 125
------------
Error:[ 0.05766743]
Pred:[0 1 1 1 1 1 0 1]
True:[0 1 1 1 1 1 0 1]
15 + 110 = 1
------------
15 + 110 = 1
------------
15 + 110 = 5
------------
15 + 110 = 13
------------
15 + 110 = 29
------------
15 + 110 = 61
------------
15 + 110 = 125
------------
15 + 110 = 125
------------
Error:[ 0.05766743]
Pred:[0 1 1 1 1 1 0 1]
True:[0 1 1 1 1 1 0 1]
15 + 110 = 1
------------
15 + 110 = 1
------------
15 + 110 = 5
------------
15 + 110 = 13
------------
15 + 110 = 29
------------
15 + 110 = 61
------------
15 + 110 = 125
------------
15 + 110 = 125
------------
Error:[ 0.05766743]
Pred:[0 1 1 1 1 1 0 1]
True:[0 1 1 1 1 1 0 1]
15 + 110 = 1
------------
15 + 110 = 1
------------
15 + 110 = 5
------------
15 + 110 = 13