In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn

In [2]:
def logprob(predictions, labels):
    """Log-probability of the true labels in a predicted batch."""
    predictions[predictions < 1e-10] = 1e-10
    return np.sum(np.multiply(labels, -np.log(predictions))) / labels.shape[0]

In [3]:
class BatchGenerator(object):
    def __init__(self, batch_size):
        self._journeys_types = np.array([
            [1,2,3,4,5,6,7],
            [7,6,5,4,3,2,1],
            [4,5,2,3,1,7,6],
            [1,1,3,4,4,5,7],
            [1,1,1,4,4,5,5]
    #         [5,5,5,2,2,2,3,3,3]
        ])
        self._batch_size = batch_size
        self._num_unrollings = max(map(len, self._journeys_types))
        self._vocabulary_size = len(np.unique(self._journeys_types.flatten()))
    
    
    def val2id(self, val):
        return val-1
    
    
    def id2val(self, id):
        return id+1
    
    
    def batch2journeys(self, batch):
        r = []
        for i in zip(*batch):
            ids = np.argmax(i, axis=1)                
            r.append(map(self.id2val, ids))
        return r
       
        
    def next_batch(self):
        return [self._journeys_types[np.random.randint(0, len(self._journeys_types))] for i in range(self._batch_size)]
        
        
    def encode_batches(self, batch):        
        b_transposed = np.transpose(batch)
        
        res = np.zeros((self._num_unrollings, self._batch_size, self._vocabulary_size), dtype=np.float32)
        for (unrolling, batch), value in np.ndenumerate(b_transposed):
            res[unrolling, batch, self.val2id(value)] = 1.0   
        return res

    
    def encode_1h(self, id):
        z = np.zeros(self._vocabulary_size, dtype=np.float32)        
        z[id] = 1.0        
        return z.reshape((self._batch_size, self._vocabulary_size))        
        

    def weighted_pick(self, weights):
        assert len(weights) == self._vocabulary_size
        t = np.cumsum(weights)
        s = np.sum(weights)
        return(int(np.searchsorted(t, np.random.rand(1)*s)))

In [8]:
# Parameters
vocabulary_size = 7 #total possible journey states
n_hidden = 14 # hidden layer num of features
n_unrollings = 7-1 #10 #max journeys length => RNN unrolled length
batch_size = 10

data_gen = BatchGenerator(batch_size)

In [9]:
graph = tf.Graph()
with graph.as_default():        

    # Input data.    
#     train_data = list()
#     for _ in range(n_unrollings + 1):
#         train_data.append(
#             tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))
#     train_inputs = train_data[:n_unrollings]
#     train_labels = train_data[1:]  # labels are inputs shifted by one time step.
    
#     train_data = list()
    jrn_length = tf.placeholder(tf.int32)
#     for _ in tf.range(jrn_length):    
#         train_data.append(
#             tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))    
#     train_inputs = train_data[:len(train_data)-1]
#     train_labels = train_data[1:]  # labels are inputs shifted by one time step.
    
    train_inputs = tf.placeholder(tf.float32, shape=(None, batch_size, vocabulary_size))
    train_labels = tf.placeholder(tf.float32, shape=(None, batch_size, vocabulary_size))
#     train_data = tf.unpack(input_batch)       
#     jrn_length = len(train_data)       

#     train_inputs = train_data[:jrn_length-1]
#     train_labels = train_data[1:]  # labels are inputs shifted by one time step.
    
    with tf.variable_scope("rnn") as rnn_scope:
    
        # RNN struct
        cell = rnn_cell.LSTMCell(num_units=n_hidden, input_size=n_hidden)  
        output, state = rnn.dynamic_rnn(cell, train_inputs, jrn_length, time_major=True, dtype=tf.float32)
#         outputs, states = rnn.rnn(cell, train_inputs, dtype=tf.float32) 
#         output = tf.concat(0, outputs)



        # Classifier.
        W_hy = tf.get_variable("W_hy", [n_hidden, vocabulary_size])
        b_hy = tf.get_variable("b_hy", [vocabulary_size])
        logits = tf.matmul(output, W_hy) + b_hy

        # Loss func
        loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits, train_labels))
#                 tf.nn.softmax_cross_entropy_with_logits(logits, tf.concat(0, train_labels))
                


        # Optimizer.
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        gradients, v = zip(*optimizer.compute_gradients(loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
        optimizer = optimizer.apply_gradients(
                zip(gradients, v), global_step=global_step)



        # Predictions.
        train_prediction = tf.nn.softmax(logits)




    #     with tf.variable_scope('inference'):
        rnn_scope.reuse_variables()

        sample_inputs = [tf.placeholder(tf.float32, shape=[None, vocabulary_size])]  
        sample_outputs, sample_states = rnn.rnn(cell, sample_inputs ,dtype=tf.float32) 

        
        sample_output = tf.concat(0, sample_outputs)

        sample_logits = tf.matmul(sample_output, W_hy) + b_hy
        sample_prediction = tf.nn.softmax(sample_logits)
    
    


ValueError: Shapes (?, 7) and (?, 10, 7) must have the same rank

In [12]:
train_inputs.get_shape()

TensorShape([Dimension(None), Dimension(10), Dimension(7)])

In [17]:
cell.input_size, cell.output_size, cell.state_size

(14, 14, 28)

In [16]:
output

<tf.Tensor 'rnn/RNN/TensorArrayPack:0' shape=<unknown> dtype=float32>

In [15]:
logits, train_labels, tf.concat(0, train_labels).get_shape()

(<tf.Tensor 'rnn/add:0' shape=(?, 7) dtype=float32>,
 <tf.Tensor 'Placeholder_2:0' shape=(?, 10, 7) dtype=float32>,
 TensorShape([Dimension(None), Dimension(10), Dimension(7)]))

In [None]:
num_epochs = 10000
summary_frequency = 100




with tf.Session(graph=graph) as sess:
# sess = tf.InteractiveSession(graph=graph)
    tf.initialize_all_variables().run()
    # saver = tf.train.Saver(tf.all_variables())

    print('Initialized')
    mean_loss = 0
    for step in range(num_epochs):

        batches = data_gen.next_batch()
        batches_encoded = data_gen.encode_batches(batches)
        feed_dict = dict()
        
        feed_dict[jrn] = len(batches_encoded) - 1
        feed_dict[input_batch] = batches_encoded
        
        # fill data into input placeholders
#         for i in range(n_unrollings+1):
#             feed_dict[train_data[i]] = batches_encoded[i]
        
        
        

        _, l, predictions, lr,jjj= sess.run([optimizer, loss, train_prediction, learning_rate,jj], feed_dict=feed_dict)
        mean_loss += l

        if step % summary_frequency == 0:    
            print jjj
            
            if step > 0:
                mean_loss = mean_loss / summary_frequency
            # The mean loss is an estimate of the loss over the last few batches.
            print(
                'Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            mean_loss = 0
            labels = np.concatenate(list(batches_encoded)[1:])
            print('Minibatch perplexity: %.2f' % float(
                    np.exp(logprob(predictions, labels))))


            pr = train_prediction.eval(feed_dict)
            print map(lambda w: data_gen.id2val(data_gen.weighted_pick(w)), pr)


        if step % (summary_frequency * 10) == 0:
            # Generate some samples.
            print('=' * 80)
            for _ in range(1):
                feed = data_gen.encode_1h(data_gen.val2id(4))
                jrn = [feed.ravel()]
                
                for _ in range(6):
                    prediction = sample_prediction.eval({sample_inputs[0]: feed})
                    jrn.append(prediction.ravel())                                            
                print map(lambda w: data_gen.id2val(data_gen.weighted_pick(w)), jrn)
            print('=' * 80)
            
            
            
            

In [None]:
cell.state_size

In [None]:
batches_encoded[0]

In [None]:
feed_dict

In [None]:
sample_inputs

In [None]:
feed

In [None]:
cell = rnn_cell.BasicLSTMCell(num_units=n_hidden) 
X = n_unrollings * [tf.placeholder(tf.float32,shape=(batch_size, n_hidden))] 

outputs, _ = rnn.rnn(cell, X, dtype=tf.float32) 
output = tf.concat(0, outputs)#tf.reshape(tf.concat(1, outputs), (-1, n_hidden))

# Input data.
train_data = list()
for _ in range(n_unrollings + 1):
    train_data.append(
        tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))
train_inputs = train_data[:n_unrollings]
train_labels = train_data[1:]  # labels are inputs shifted by one time step.

In [None]:
print 'X: ', len(X), X[0].get_shape()
print 'output: ', len(outputs),' * ',outputs[0].get_shape(),' => ', output.get_shape()

In [None]:
print len(train_inputs), train_inputs[0].get_shape(), ' => ',tf.concat(0, train_inputs).get_shape()

In [None]:
# Classifier.
W_hy = tf.get_variable("W_hy", [n_hidden, vocabulary_size])
b_hy = tf.get_variable("b_hy", [vocabulary_size])
logits = tf.matmul(output, W_hy) + b_hy




In [None]:
print 'logits: ',output.get_shape() ,"*",W_hy.get_shape(),"+",b_hy.get_shape(),'=>'
print '    ',logits.get_shape()

In [None]:
loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf.concat(0, train_labels)))


# Optimizer.
global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
gradients, v = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
optimizer = optimizer.apply_gradients(
        zip(gradients, v), global_step=global_step)



# Predictions.
train_prediction = tf.nn.softmax(logits)




