In [1]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

In [2]:
sess = tf.Session()
random_seed = 41

In [3]:
epochs = 100
lstm_units = 64
num_classes = 2

dataset_size = 100000
max_sequence_length = 50
batch_size = 16
num_batches = dataset_size//batch_size

## 100,000 Binary Strings of Length 50

In [4]:
data1 = np.random.binomial(1, 0.5, (dataset_size, max_sequence_length, 1))
labels1 = np.reshape(np.sum(data1, axis=1) % 2, (-1, 1))
labels1_one_hot = OneHotEncoder().fit_transform(labels1).toarray()

lengths1 = np.ones((dataset_size, 1)) * max_sequence_length

## 100,000 Binary Strings of Length Between 1-50

In [6]:
data2 = list()
labels2 = list()

for _ in range(dataset_size):
    n = np.random.randint(1, max_sequence_length+1)
    data_row = np.random.binomial(1, 0.5, (1, n))
    data2.append(data_row)

    label_row = np.sum(data_row, axis=1) % 2
    labels2.append(label_row)
labels2 = np.array(labels2)

## LSTM Model

In [7]:
X = tf.placeholder(tf.float32, [None, max_sequence_length, 1])
X_len = tf.placeholder(tf.int32, [None, 1])
Y = tf.placeholder(tf.float32, [None, 2])

In [8]:
batch_dim = tf.shape(X)[0]
with tf.variable_scope("LSTM", reuse=tf.AUTO_REUSE):
    tf.set_random_seed(random_seed)
    
    cell = tf.contrib.rnn.LSTMCell(lstm_units)
    initial_state = cell.zero_state(batch_dim, tf.float32)
    
    output, _ = tf.nn.dynamic_rnn(cell, X, initial_state=initial_state)

In [9]:
def extract_final_output(output, lengths):
    indices = lengths[-1] - 1
    h = tf.gather(output, indices, axis=1)
    h = tf.squeeze(h)
    return h

final_rnn_outputs = extract_final_output(output, X_len)

In [10]:
with tf.variable_scope("Linear"):
    W = tf.Variable(tf.random_normal([lstm_units, num_classes], stddev=0.1, seed=random_seed))
    b = tf.Variable(tf.zeros([num_classes]))
    h = tf.matmul(final_rnn_outputs, W) + b

In [11]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=h, labels=Y))
train_op = tf.train.AdamOptimizer(0.005).minimize(loss)

In [19]:
predictions = tf.squeeze(tf.argmax(h, axis=1))

## Training

In [25]:
sess.run(tf.global_variables_initializer())

for t in range(epochs):
    print("Epoch {}\n".format(t))
    for i in range(0, num_batches, batch_size):
        X_batch = data1[i:i+batch_size]
        Y_batch = labels1_one_hot[i:i+batch_size]
        X_len_batch = lengths1[i:i+batch_size]
        _  = sess.run([loss, train_op], feed_dict={X: X_batch, Y: Y_batch, X_len: X_len_batch})[0]
        
        if i//batch_size % 1000 == 0:
            pred = sess.run(predictions, feed_dict={X: X_batch, Y: Y_batch, X_len: X_len_batch})
            accuracy = np.mean(pred == labels1[i:i+batch_size, :])
            print("Batch {}\nAccuracy: {:.1f}%".format(i//batch_size, accuracy*100))

Epoch 0

Batch 0
Accuracy: 50.8%
Epoch 1

Batch 0
Accuracy: 56.2%


KeyboardInterrupt: 