In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length

def generateData():
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

init_state = tf.placeholder(tf.float32, [batch_size, state_size])

W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)

# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
    current_input = tf.reshape(current_input, [batch_size, 1])
    input_and_state_concatenated = tf.concat([current_input, current_state],1)  # Increasing number of columns

    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
    states_series.append(next_state)
    current_state = next_state

logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) for logits, labels in zip(logits_series,labels_series)]
total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)


with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()
        _current_state = np.zeros((batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()

In [11]:
V_dict = dict()
V_dict['123']

KeyError: '123'

In [3]:
l = []
for x in range(5):
    l2 = range(x)
    l.append(l2)
l

[range(0, 0), range(0, 1), range(0, 2), range(0, 3), range(0, 4)]

In [32]:
%run -i ../dataPreprocess.py

pos finished
neg finished
25000
25000
<numpy.lib.npyio.NpzFile object at 0x7f8dba14cc18>
25000


In [30]:
# loaded = np.load('aclimdb-train.npz')
print(loaded)
print(loaded['inputs'][0])
print(loaded['inputs'].shape)

<numpy.lib.npyio.NpzFile object at 0x7f8d979b8ac8>
[37729, 37209, 158108, 86290, 139250, 86477, 86290, 127094, 124818, 119952, 157375, 146783, 157593, 124866, 94382, 112400, 37209, 108253, 150092, 37209, 157375, 115029, 110972, 37209, 155247, 141912, 150354, 129701, 86290, 125306, 152563, 156766, 111530, 128926, 71922, 157266, 93237, 8961, 87469, 37209, 109900, 127561, 156547, 86290, 123426, 136907, 127094, 150092, 156485, 111714, 150888, 156531, 37209, 103107, 149025, 108253, 37209, 101030, 150888, 91238, 86426, 150888, 146594, 117852, 150107, 139957, 157057, 119952, 156485, 129688, 38879, 156504, 149025, 113694, 94382, 89791, 122140, 119952, 156485, 128822, 152906, 128822, 86290, 97935, 129457, 107407, 157047, 88057, 124006, 124866, 100331, 38879, 156485, 120723, 148681, 110544, 88372, 117275, 158516, 94803, 149025, 149160, 119952, 146594, 117852, 150107, 123054, 139957, 93374, 37209, 87564, 157593, 150394, 157185, 101831, 150354, 127094, 91363, 119952, 119893, 148380, 150888, 133326

In [3]:
import data_providers as data_providers
train_data = data_providers.ACLIMDBDataProvider('train', batch_size=50)
print(train_data.next())

(array([ list([3463, 137082, 151061, 129457, 113225, 139256, 86477, 150354, 127094, 37209, 101606, 150888, 149160, 119952, 110811, 86290, 145759, 37209, 93271, 119952, 129688, 20112, 115461, 150107, 145759, 135899, 37209, 123712, 86290, 104700, 77044, 91516, 156485, 107785, 37209, 116362, 110811, 86290, 132141, 142335, 154710, 87968, 157593, 145103, 136487, 150092, 37209, 155247, 115186, 129688, 80965, 84437, 76172, 87753, 150888, 156963, 143071, 115029, 91445, 86290, 113225, 127094, 12722, 150107, 133401, 37209, 115186, 156485, 144842, 113694, 156485, 144842, 134448, 26182, 150701, 37209, 140930, 86290, 95989, 107963, 110811, 150107, 45884, 37209, 114089, 157047, 81149, 115084, 156485, 60194, 150107, 129731, 99395, 95989, 112574, 121099, 129464, 117852, 150107, 125586, 129457, 150107, 127094, 56204, 144842, 150107, 133401, 156504, 149025, 155484, 113225, 94382, 89791, 122140, 150210, 156485, 145103, 113225, 121500, 111616, 139516, 84423, 77044, 109730, 156878, 155484, 143024, 88372, 1

In [2]:
for input_batch, target_batch in train_data:
    print(input_batch)


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
