In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn

from functools import partial

In [2]:
def logprob(predictions, labels):
    """Log-probability of the true labels in a predicted batch."""
    predictions[predictions < 1e-10] = 1e-10
    return np.sum(np.multiply(labels, -np.log(predictions))) / labels.shape[0]


def slidingWindow(sequence,winSize,step=1):
    """Returns a generator that will iterate through
    the defined chunks of input sequence.  Input sequence
    must be iterable."""
 
    # Verify the inputs
    try: it = iter(sequence)
    except TypeError:
        raise Exception("**ERROR** sequence must be iterable.")
    if not ((type(winSize) == type(0)) and (type(step) == type(0))):
        raise Exception("**ERROR** type(winSize) and type(step) must be int.")
    if step > winSize:
        raise Exception("**ERROR** step must not be larger than winSize.")
    if winSize > len(sequence):
        raise Exception("**ERROR** winSize must not be larger than sequence length.")
 
    # Pre-compute number of chunks to emit
    numOfChunks = ((len(sequence)-winSize)/step)+1
 
    # Do the work
    for i in range(0,numOfChunks*step,step):
        yield sequence[i:i+winSize]
        
        
def adjust_sequence_to_length(sequence, length, padder):        
    if len(sequence) > length:
        slices_num = float(len(sequence))/length
        step = max(int((slices_num%1)*length),2)
        chunks = list(slidingWindow(sequence, winSize=length, step=step))
        return chunks
    if len(sequence) == length:
        return [sequence]
    if len(sequence) < length:
        gap = length - len(sequence)        
        seq_pred = padder(sequence, gap)            
        return [seq_pred]

In [3]:
class BatchGenerator(object):
    
    def __init__(self, batch_size):
        self._journeys_types = np.array([
            [1,2,3,4,5,6,7],
            [1,2,3,4,5],
            [7,6,5,4,3,2,1],
            [7,6,5,4,5],
            [4,5,2,3,1,7,6],
            [1,1,3,4,4,5,7],
            [1,1,3,4,4],
            [1,1,1,4,4,5,5],
            [5,5,5,2,2,2,3,3,3]
        ])
        self._batch_size = batch_size        
        self._vocabulary_size = len(np.unique([item for sublist in self._journeys_types for item in sublist]))
    
    
    def val2id(self, val):
        return val-1
    
    
    def id2val(self, id):
        return id+1
    
    
    def batch2journeys(self, batch):
        r = []
        for i in zip(*batch):
            ids = np.argmax(i, axis=1)                
            r.append(map(self.id2val, ids))
        return r
       
        
    def next_batch(self, unrollings_adjuster=None, num_unrollings=None):
        batch = []
        while len(batch)<self._batch_size:
            b = self._journeys_types[np.random.randint(0, len(self._journeys_types))]  
            if unrollings_adjuster and num_unrollings:
                pb = unrollings_adjuster(b, num_unrollings)                 
                batch.extend(pb)      
            else:
                batch.append(b)
        return map(lambda i: batch[i], np.random.choice(len(batch), self._batch_size, replace=False))
               
        
    def encode_batches(self, batch, num_unrollings):        
        b_transposed = np.transpose(batch)
        
        res = np.zeros((num_unrollings, self._batch_size, self._vocabulary_size), dtype=np.float32)
        for (unrolling, batch), value in np.ndenumerate(b_transposed):
            res[unrolling, batch, self.val2id(value)] = 1.0   
        return res

    
    def encode_1h(self, id):
        z = np.zeros(self._vocabulary_size, dtype=np.float32)        
        z[id] = 1.0        
        return z.reshape((1, self._vocabulary_size))        
        

    def weighted_pick(self, weights):
        assert len(weights) == self._vocabulary_size
        t = np.cumsum(weights)
        s = np.sum(weights)
        return(int(np.searchsorted(t, np.random.rand(1)*s)))

In [4]:
# Parameters
n_hidden = 14 # hidden layer num of features
n_unrollings = 6 #10 #max journeys length => RNN unrolled length
batch_size = 3

data_gen = BatchGenerator(batch_size)#, n_unrollings+1)

vocabulary_size = data_gen._vocabulary_size #total possible journey states

In [5]:
graph = tf.Graph()
with graph.as_default():        

    # Input data.
    train_data = list()
    for _ in range(n_unrollings+1):
        train_data.append(
            tf.placeholder(tf.float32, shape=[batch_size, vocabulary_size]))
    train_inputs = train_data[:n_unrollings]
    train_labels = train_data[1:]  # labels are inputs shifted by one time step.
    train_labels_concat = tf.concat(0, train_labels)
    
    with tf.variable_scope("rnn") as rnn_scope:
    
        # RNN struct
        cell = rnn_cell.LSTMCell(num_units=n_hidden, input_size=vocabulary_size)     
        outputs, states = rnn.rnn(cell, train_inputs, dtype=tf.float32) 
        output = tf.concat(0, outputs)#tf.reshape(tf.concat(1, outputs), (-1, n_hidden))

        # Classifier.
        W_hy = tf.get_variable("W_hy", [n_hidden, vocabulary_size])
        b_hy = tf.get_variable("b_hy", [vocabulary_size])
        logits = tf.matmul(output, W_hy) + b_hy

        # Loss func
        loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits, train_labels_concat))


        # Optimizer.
        global_step = tf.Variable(0, name='global_step')
        learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        gradients, v = zip(*optimizer.compute_gradients(loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
        optimizer = optimizer.apply_gradients(
                zip(gradients, v), global_step=global_step)


        # Predictions.
        train_prediction = tf.nn.softmax(logits)
        
        #perplexity
#         predictions[predictions < 1e-10] = 1e-10
#         return np.sum(np.multiply(labels, -np.log(predictions))) / labels.shape[0]        
        perplexity = tf.reduce_sum(tf.mul(train_labels_concat, -tf.log(train_prediction)))/train_labels_concat.get_shape()[0].value
        
        loss_summary = tf.scalar_summary("loss", loss)
        learning_rate_summary = tf.scalar_summary("learning_rate", learning_rate)
        perplexity_summary = tf.scalar_summary("perplexity_summary", perplexity)
    


In [6]:
predictors = {}
def get_predictor(inputs_number):
    
    if inputs_number in predictors:       
        return predictors[inputs_number]
    else:
    
        with graph.as_default():  

    #         rnn_scope.reuse_variables()
            with tf.variable_scope("rnn",reuse=True) as rnn_scope:

                sample_inputs = list()
                for _ in range(inputs_number):
                    sample_inputs.append(
                        tf.placeholder(tf.float32, shape=[None, vocabulary_size]))

                sample_outputs, sample_states = rnn.rnn(cell, sample_inputs ,dtype=tf.float32) 


                sample_output = tf.concat(0, sample_outputs)

                sample_logits = tf.matmul(sample_output, W_hy) + b_hy
                sample_prediction = tf.nn.softmax(sample_logits)

                predictors[inputs_number] = (sample_inputs, sample_prediction)
                return sample_inputs, sample_prediction

In [7]:
def predict(ini_steps, num_steps_forward):
    
    jrn_ids = map(data_gen.val2id, ini_steps)
    jrn_1h = map(data_gen.encode_1h, jrn_ids)
    
    total_steps = num_steps_forward + len(jrn_ids)
    for _ in range(total_steps-len(jrn_ids)):
        sample_inputs, model = get_predictor(len(jrn_ids))

        dick ={}
        for i, s in zip(sample_inputs, jrn_1h):                        
            dick[i]=s

        prediction = model.eval(dick)

        pred_step_id = data_gen.weighted_pick(prediction[-1].ravel())                                          
        jrn_ids.append(pred_step_id)                 

        pred_step_1h = data_gen.encode_1h(pred_step_id)                    
        jrn_1h.append(pred_step_1h) 
        
    return map(lambda w: data_gen.id2val(w), jrn_ids)

In [8]:
num_epochs = 10000
summary_frequency = 100

unrollings_adjuster = partial(adjust_sequence_to_length, padder=predict)



# with tf.Session(graph=graph) as sess:
sess = tf.InteractiveSession(graph=graph)


merged_summaries = tf.merge_all_summaries()
writer = tf.train.SummaryWriter('/tmp/tensorflow_logs', sess.graph_def)

tf.initialize_all_variables().run()
# saver = tf.train.Saver(tf.all_variables())



print('Initialized')
mean_loss = 0
for step in range(num_epochs):

    batches = data_gen.next_batch(unrollings_adjuster=unrollings_adjuster, num_unrollings=n_unrollings+1)
    batches_encoded = data_gen.encode_batches(batches,n_unrollings+1)
    feed_dict = dict()
    # fill data into input placeholders
    for i in range(n_unrollings+1):
        feed_dict[train_data[i]] = batches_encoded[i]


    _, l, prplx, predictions, lr, summaries_str = sess.run([optimizer, loss, perplexity, train_prediction, learning_rate, merged_summaries], feed_dict=feed_dict)
    mean_loss += l

    
    
    if step % summary_frequency == 0:
        if step > 0:
            mean_loss = mean_loss / summary_frequency
        # The mean loss is an estimate of the loss over the last few batches.
        
        mean_loss = 0
        
        
#         labels = np.concatenate(list(batches_encoded)[1:])
#         perplexity = np.exp(logprob(predictions, labels))
        
        
        if step % summary_frequency == 0:
            print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
            print('Minibatch perplexity: %.2f' % float(prplx))
            writer.add_summary(summaries_str, step)
#             writer.add_summary(perplexity_summary, step)

        
        

#         pr = train_prediction.eval(feed_dict)
        pr = predictions
        print map(lambda w: data_gen.id2val(data_gen.weighted_pick(w)), pr)



Initialized
Average loss at step 0: 0.000000 learning rate: 10.000000
Minibatch perplexity: 2.41
[6, 6, 4, 1, 6, 5, 3, 6, 6, 6, 6, 6, 6, 3, 1, 1, 7, 1]
Average loss at step 100: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.81
[6, 6, 2, 5, 5, 4, 4, 4, 4, 3, 5, 4, 4, 6, 5, 5, 5, 4]
Average loss at step 200: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.48
[5, 1, 5, 3, 3, 2, 2, 4, 5, 3, 4, 2, 3, 5, 2, 3, 5, 3]
Average loss at step 300: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.28
[1, 1, 2, 3, 1, 4, 5, 4, 4, 4, 4, 4, 4, 5, 5, 5, 7, 7]
Average loss at step 400: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.15
[5, 6, 1, 6, 5, 3, 3, 4, 4, 1, 5, 4, 7, 2, 5, 6, 1, 5]
Average loss at step 500: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.26
[6, 1, 1, 5, 3, 3, 4, 4, 4, 1, 4, 5, 2, 5, 6, 1, 5, 7]
Average loss at step 600: 0.000000 learning rate: 10.000000
Minibatch perplexity: 0.24
[2, 2, 6, 3, 3, 5, 4, 4, 4, 4, 4, 3, 5, 5, 2, 7, 7,

In [9]:
predict([7,6], 5)

[7, 6, 5, 4, 5, 4, 5]

In [None]:
data_gen = BatchGenerator(2,4)
batches = data_gen.next_batch()
batches
# batches_encoded = data_gen.encode_batches(batches)

In [None]:


ini_steps = [2,3,1]
jrn_ids = map(data_gen.val2id, ini_steps)
jrn_1h = map(data_gen.encode_1h, jrn_ids)


# for _ in range(7-len(jrn_ids)):
for _ in range(10-len(jrn_ids)):
    sample_inputs, model = get_predictor(len(jrn_ids))

    dick ={}
    for i, s in zip(sample_inputs, jrn_1h):                        
        dick[i]=s

    prediction = model.eval(dick)

    pred_step_id = data_gen.weighted_pick(prediction[-1].ravel())                                          
    jrn_ids.append(pred_step_id)                 

    pred_step_1h = data_gen.encode_1h(pred_step_id)                    
    jrn_1h.append(pred_step_1h)                    



print map(lambda w: data_gen.id2val(w), jrn_ids)

In [None]:
arr = [1,2,3]
print np.pad(arr,pad_width=3, mode='reflect')
print np.pad(arr,pad_width=3, mode='symmetric')
print np.pad(arr,pad_width=3, mode='wrap')
print np.pad(arr,pad_width=3, mode='edge')
print np.pad(arr,pad_width=3, mode='constant',constant_values=9)
print np.pad(arr,pad_width=3)