# Sum of two binary numbers using single cell RNN

Martin Kersner, <m.kersner@gmail.com>

2017/07/04

This notebook is part of presentation about Recurrent Neural Networks at [Seoul Artificial Intelligence Meetup](https://www.meetup.com/Seoul-Artificial-Intelligence-Meetup/).

In [1]:
import random
import numpy as np
import tensorflow as tf

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Auxiliary functions for conversion to/from binary numbers and for generating and splitting dataset.

In [2]:
def int2bin(i, length=10):
    b = bin(i)[2:].zfill(length)
    b_lst = [int(i) for i in b]
    b_lst.reverse()
    return np.array(b_lst)

def bin2int(b, axis=0):
    b_lst = list(b[0,:,axis])
    b_lst.reverse()
    b_str = "".join(str(int(i)) for i in b_lst)
    return int(b_str, 2)

def int2binABC(A, B, C, length=10):
    return int2bin(A, length), int2bin(B, length), int2bin(C, length)

def gen_data(func):
    def inner(*args, **kwargs):
        A, B = func(*args, **kwargs)
        C = A+B
        Ab, Bb, Cb = int2binABC(A, B, C)

        X_batch = None
        X_batch = np.vstack((Ab, Bb)).T
        X_batch = X_batch[np.newaxis, :]
        Y_batch = Cb[np.newaxis, :, np.newaxis]
        
        return X_batch, Y_batch
    
    return inner

@gen_data
def gen_given_data(A, B):
    return A, B

@gen_data
def gen_random_data(max_val=100):
    A = np.random.randint(max_val)
    B = np.random.randint(max_val)
    
    return A, B

def gen_dataset(length, binary_length=10):
    X = np.zeros((length*length, binary_length,  2))
    Y = np.zeros((length*length, binary_length, 1))    

    idx = 0
    for A in range(length):
        for B in range(length):
            C = A + B

            Ab, Bb, Cb = int2binABC(A, B, C, binary_length)

            X[idx] = np.vstack((Ab, Bb)).T
            Y[idx] = Cb[:, np.newaxis]

            idx += 1

    return X, Y

def split_train_test(X, Y, ratio=0.7):
    length = X.shape[0]
    
    ran = range(length)
    train_lst = random.sample(ran, int(ratio*length))
    test_lst = list(set(ran)-set(train_lst))
    
    X_train = X[train_lst, :, :]
    Y_train = Y[train_lst, :, :]
    
    X_test = X[test_lst, :, :]
    Y_test = Y[test_lst, :, :]

    return X_train, Y_train, X_test, Y_test

## Generate dataset

Dataset consist of numbers between 0 and 99 (`X_data`) and their sums (`Y_data`). Dataset is split to training and validation data in ratio 10:90.

In [3]:
X_data, Y_data = gen_dataset(100)
X_train, Y_train, X_test, Y_test = split_train_test(X_data, Y_data, 0.10)

In [4]:
print(X_train.shape[0])
print(X_test.shape[0])

1000
9000


## Define RNN network

RNN network is composed of 1 one RNN cell with 2 neurons and will be computed over 10 time steps.

Number of inputs is 2; one value for the first binary number and one for the second binary numnber.
Number of outputs is 1, because we want to predict 1 binary value per time step.

In [5]:
reset_graph()

n_steps   = 10
n_inputs  = 2
n_outputs = 1
n_neurons = 2

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

# RNN cell
cell = tf.contrib.rnn.OutputProjectionWrapper(
    tf.contrib.rnn.BasicRNNCell(num_units=n_neurons),
    output_size=n_outputs)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

## Define loss function and optimizer

In [6]:
learning_rate = 0.01
loss = tf.reduce_mean(tf.square(outputs-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

## Train and evaluate

Train model for 80 epochs or terminate training early if MSE drops below 0.01. Traning data are fed in batches of size 50. After, trained model is evaluated on validation dataset and saved into `model` file.


In [7]:
batch_size   = 50
n_epochs     = 80

saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        for iteration in range(X_train.shape[0] // batch_size):
            X_batch = X_train[iteration*batch_size:(iteration*batch_size)+batch_size,:,:]
            Y_batch = Y_train[iteration*batch_size:(iteration*batch_size)+batch_size,:,:]
        
            sess.run(training_op, feed_dict={X: X_batch, y: Y_batch})

        mse = loss.eval(feed_dict={X: X_batch, y: Y_batch})
        print("epoch {} MSE: {}".format(epoch, mse))
        
        if mse < 0.01:
            break

    saver.save(sess, "./model")
    
    res = loss.eval(feed_dict={X: X_test, y: Y_test})
    print("test  MSE: ", res)

epoch 0 MSE: 0.4649597108364105
epoch 1 MSE: 0.3353145122528076
epoch 2 MSE: 0.277487576007843
epoch 3 MSE: 0.25327014923095703
epoch 4 MSE: 0.24150414764881134
epoch 5 MSE: 0.23641417920589447
epoch 6 MSE: 0.23250973224639893
epoch 7 MSE: 0.22827669978141785
epoch 8 MSE: 0.22414745390415192
epoch 9 MSE: 0.2203630954027176
epoch 10 MSE: 0.21682028472423553
epoch 11 MSE: 0.21336780488491058
epoch 12 MSE: 0.20991644263267517
epoch 13 MSE: 0.20639681816101074
epoch 14 MSE: 0.2027425915002823
epoch 15 MSE: 0.19890856742858887
epoch 16 MSE: 0.19487206637859344
epoch 17 MSE: 0.19062311947345734
epoch 18 MSE: 0.1861514002084732
epoch 19 MSE: 0.18143562972545624
epoch 20 MSE: 0.17644280195236206
epoch 21 MSE: 0.1711369901895523
epoch 22 MSE: 0.16548380255699158
epoch 23 MSE: 0.1594487875699997
epoch 24 MSE: 0.1530078500509262
epoch 25 MSE: 0.14618070423603058
epoch 26 MSE: 0.139068141579628
epoch 27 MSE: 0.13186581432819366
epoch 28 MSE: 0.12483739107847214
epoch 29 MSE: 0.11825453490018845
ep

## Test model

Load trained model and compute sum of A and B binary numbers.

Change `A` and `B` values and see if sum of those numbers will be correct.
`A` and `B` values are internally converted to binary numbers with **rightmost significant  bit**.
Notice that model was trained only on numbers up to 99, but is able to sum larger numbers if their total sum is smaller than 1023.

In [8]:
A = 853
B = 126
C = A+B

with tf.Session() as sess:
    saver.restore(sess, "./model")
      
    X_batch, y_batch = gen_given_data(A, B)
    
    binary_sum = outputs.eval(feed_dict={X: X_batch, y: y_batch})
    
    # Thresholding is neccessary because predictions from model are floating point values between 0 and 1
    # but we want to obtain binary values.
    binary_sum[binary_sum > 0.5]  = 1
    binary_sum[binary_sum <= 0.5] = 0
    
    int_sum = bin2int(binary_sum)
    
    print("A   ", bin2int(X_batch))
    print("B   ", bin2int(X_batch, axis=1))
    print("SUM ", int_sum)
    
    print("Correct" if C == int_sum else "Wrong")

INFO:tensorflow:Restoring parameters from ./model
A    853
B    126
SUM  979
Correct
