In [3]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn

In [4]:
def logprob(predictions, labels):
    """Log-probability of the true labels in a predicted batch."""
    predictions[predictions < 1e-10] = 1e-10
    return np.sum(np.multiply(labels, -np.log(predictions))) / labels.shape[0]

In [5]:
class BatchGenerator(object):
    def __init__(self, batch_size):
        self._journeys_types = np.array([
            [1,2,3,4,5,6,7],
            [7,6,5,4,3,2,1],
            [4,5,2,3,1,7,6],
            [1,1,3,4,4,5,7],
            [1,1,1,4,4,5,5]
    #         [5,5,5,2,2,2,3,3,3]
        ])
        self._batch_size = batch_size
        self._num_unrollings = max(map(len, self._journeys_types))
        self._vocabulary_size = len(np.unique(self._journeys_types.flatten()))
    
    
    def val2id(self, val):
        return val-1
    
    
    def id2val(self, id):
        return id+1
    
    
    def batch2journeys(self, batch):
        r = []
        for i in zip(*batch):
            ids = np.argmax(i, axis=1)                
            r.append(map(self.id2val, ids))
        return r
       
        
    def next_batch(self):
        return [self._journeys_types[np.random.randint(0, len(self._journeys_types))] for i in range(self._batch_size)]
        
        
    def encode_batches(self, batch):        
        b_transposed = np.transpose(batch)
        
        res = np.zeros((self._num_unrollings, self._batch_size, self._vocabulary_size), dtype=np.float32)
        for (unrolling, batch), value in np.ndenumerate(b_transposed):
            res[unrolling, batch, self.val2id(value)] = 1.0   
        return res

    
    def encode_1h(self, id):
        z = np.zeros(self._vocabulary_size, dtype=np.float32)        
        z[id] = 1.0        
#         return z.reshape((self._batch_size, self._vocabulary_size))        
        return z.reshape((1, self._vocabulary_size))        
        

    def weighted_pick(self, weights):
        assert len(weights) == self._vocabulary_size
        t = np.cumsum(weights)
        s = np.sum(weights)
        return(int(np.searchsorted(t, np.random.rand(1)*s)))

In [26]:
# Parameters
vocabulary_size = 7 #total possible journey states
n_hidden = 14 # hidden layer num of features
n_unrollings = 7-1 #10 #max journeys length => RNN unrolled length
batch_size = 1

data_gen = BatchGenerator(batch_size)

In [27]:
graph = tf.Graph()
with graph.as_default():        

    # Input data.
    
#     jrn_length_input = tf.placeholder(tf.int32, shape=None)
#     jrn_length = tf.constant((jrn_length_input)
#     jrn_length.
#     train_data = list()
#     for _ in range(jrn_length):
#         train_data.append(
#             tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))
#     train_inputs = train_data[:jrn_length-1]
#     train_labels = train_data[1:]  # labels are inputs shifted by one time step
    
    train_data = list()
    for _ in range(n_unrollings + 1):
        train_data.append(
            tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))
    train_inputs = train_data[:n_unrollings]
    train_labels = train_data[1:]  # labels are inputs shifted by one time step.
    
    
    with tf.variable_scope("rnn") as rnn_scope:
    
        # RNN struct
        cell = rnn_cell.LSTMCell(num_units=n_hidden, input_size=vocabulary_size)     
        outputs, states = rnn.rnn(cell, train_inputs, dtype=tf.float32) 
        output = tf.concat(0, outputs)#tf.reshape(tf.concat(1, outputs), (-1, n_hidden))

        # Classifier.
        W_hy = tf.get_variable("W_hy", [n_hidden, vocabulary_size])
        b_hy = tf.get_variable("b_hy", [vocabulary_size])
        logits = tf.matmul(output, W_hy) + b_hy

        # Loss func
        loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits, tf.concat(0, train_labels)))


        # Optimizer.
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        gradients, v = zip(*optimizer.compute_gradients(loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
        optimizer = optimizer.apply_gradients(
                zip(gradients, v), global_step=global_step)


        # Predictions.
        train_prediction = tf.nn.softmax(logits)

        
#         loss_summary = tf.scalar_summary("loss", loss)
#         learning_rate_summary = tf.scalar_summary("learning_rate", learning_rate)

In [28]:
def get_predictor(inputs_number):
    with graph.as_default():  
        
#         rnn_scope.reuse_variables()
        with tf.variable_scope("rnn",reuse=True) as rnn_scope:
        
            sample_inputs = list()
            for _ in range(inputs_number):
                sample_inputs.append(
                    tf.placeholder(tf.float32, shape=[None, vocabulary_size]))

            sample_outputs, sample_states = rnn.rnn(cell, sample_inputs ,dtype=tf.float32) 


            sample_output = tf.concat(0, sample_outputs)

            sample_logits = tf.matmul(sample_output, W_hy) + b_hy
            sample_prediction = tf.nn.softmax(sample_logits)
            
            return sample_inputs, sample_prediction

In [29]:
num_epochs = 10000
summary_frequency = 100




# with tf.Session(graph=graph) as sess:
sess = tf.InteractiveSession(graph=graph)


# merged_summaries = tf.merge_all_summaries()
writer = tf.train.SummaryWriter('/tmp/tensorflow_logs', sess.graph_def)

tf.initialize_all_variables().run()
# saver = tf.train.Saver(tf.all_variables())



print('Initialized')
mean_loss = 0
for step in range(num_epochs):

    batches = data_gen.next_batch()
    batches_encoded = data_gen.encode_batches(batches)
    feed_dict = dict()
    # fill data into input placeholders
    for i in range(n_unrollings+1):
        feed_dict[train_data[i]] = batches_encoded[i]


    _, l, predictions, lr = sess.run([optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
    mean_loss += l

    
    
    if step % summary_frequency == 0:
        if step > 0:
            mean_loss = mean_loss / summary_frequency
        # The mean loss is an estimate of the loss over the last few batches.
        
        mean_loss = 0
        
        
        labels = np.concatenate(list(batches_encoded)[1:])
        perplexity = np.exp(logprob(predictions, labels))
        
        perplexity_summary = tf.scalar_summary("perplexity", perplexity)        
#         mean_loss_summary = tf.scalar_summary("mean_loss", mean_loss)
        
        if step % summary_frequency == 0:
            print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            print('Minibatch perplexity: %.2f' % float(perplexity))
#             writer.add_summary(summary_str, step)

        
        

        pr = train_prediction.eval(feed_dict)
        print map(lambda w: data_gen.id2val(data_gen.weighted_pick(w)), pr)

Initialized
Average loss at step 0: 0.000000 learning rate: 10.000000
Minibatch perplexity: 12.53
[7, 4, 3, 3, 3, 7]
Average loss at step 100: 0.000000 learning rate: 10.000000
Minibatch perplexity: 1.40
[6, 5, 4, 3, 2, 1]
Average loss at step 200: 0.000000 learning rate: 10.000000
Minibatch perplexity: 6.61
[1, 1, 4, 5, 5, 5]
Average loss at step 300: 0.000000 learning rate: 10.000000
Minibatch perplexity: 1.02
[5, 2, 3, 1, 7, 6]
Average loss at step 400: 0.000000 learning rate: 10.000000
Minibatch perplexity: 1.00
[5, 2, 3, 1, 7, 6]


Exception AssertionError: AssertionError() in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f580a4f0ad0>> ignored
Exception AssertionError: AssertionError() in <generator object get_controller at 0x7f580a4dd1e0> ignored


KeyboardInterrupt: 

In [30]:
len(list(batches_encoded)[1:][0])

1

In [31]:
labels.shape

(6, 7)

In [18]:
ini_steps = [7]
jrn_ids = map(data_gen.val2id, ini_steps)
jrn_1h = map(data_gen.encode_1h, jrn_ids)


# for _ in range(7-len(jrn_ids)):
for _ in range(5-len(jrn_ids)):
    sample_inputs, model = get_predictor(len(jrn_ids))

    dick ={}
    for i, s in zip(sample_inputs, jrn_1h):                        
        dick[i]=s

    prediction = model.eval(dick)

    pred_step_id = data_gen.weighted_pick(prediction[-1].ravel())                                          
    jrn_ids.append(pred_step_id)                 

    pred_step_1h = data_gen.encode_1h(pred_step_id)                    
    jrn_1h.append(pred_step_1h)                    



print map(lambda w: data_gen.id2val(w), jrn_ids)

[7, 6, 5, 4, 3]


In [None]:
1+2

In [12]:
id=5

z = np.zeros(7, dtype=np.float32)        
z[id] = 1.0        
z.reshape((2, 7))

ValueError: total size of new array must be unchanged