# Imports

In [None]:
# As usual, a bit of setup
import time
from time import process_time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
import copy
import math
from utils.data_utils import *
import sys
import re
from utils.spj import Config
from utils.spj import SPJ
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# Specify Model Directory

In [None]:
home_dir = "/home/martnzjulio_a/songze"
#home_dir = "/home/songzeli"
version = "netvlad_test100"

#hyperparameters
num_proposals = 10
num_c3d_features = 500
batch_size = 25
hidden_dim = 512
num_layers = 2
learning_rate = 0.001
keep_prob_p = 1.0 
num_steps = 30
hyperparameters = (batch_size, hidden_dim, num_proposals, num_c3d_features, num_layers, learning_rate, keep_prob_p)
print(hyperparameters)

print()
print("DIRECTORY SET TO: ", home_dir)
print("VERSION SET TO  : ", version)

# Load Vocabulary

In [None]:
# Load Vocabulary
embedding_size =hidden_dim
pad_len = num_steps
max_num_proposals = num_proposals
vocabulary,vocab_size = caption_preprocess(home_dir)
emb_matrix,word2id,id2word = get_wordvector(embedding_size,vocab_size,vocabulary)
num_classes = len(word2id)

# Word Embedding Matrix
emb_matrix, word2id, id2word = get_wordvector(embedding_size,vocab_size,vocabulary) #changed by Songze

# Load All Data

In [None]:
#num_train = 100
num_train = 2000

# Load Training Data
train_file = home_dir + "/SPJ/train_2400.csv"
#train_file = home_dir + "/SPJ/train_val_300.csv"
train_ids,train_data,train_padded_proposals,train_padded_framestamps = video_preprocess(home_dir, train_file, max_num_proposals)

# Train Captions
train_padded_sentences,train_padded_sentences_2,train_padded_sentences_id = get_padded_sentences_id(pad_len, train_ids, train_data, word2id, max_num_proposals) 
Ycaptions_train = np.transpose(copy.deepcopy(train_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_train,:,1:]
Xcaptions_train = np.transpose(copy.deepcopy(train_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_train]

Ycaptions_train = truncate_captions(Ycaptions_train)
Xcaptions_train = truncate_captions(Xcaptions_train)


# Train Features 
VideoIds_train = train_ids[:num_train]
Framestamps_train = train_padded_framestamps[:num_train]
H_train = train_padded_proposals.astype(np.float32)[:num_train]
Ipast_train = temporal_indicator(train_padded_framestamps, mode="past").astype(np.float32)[:num_train]
Ifuture_train = temporal_indicator(train_padded_framestamps, mode="future").astype(np.float32)[:num_train]

num_train = len(train_ids[:num_train])
print("Number of Training Examples:", num_train)
print()
print("VideoIds_train.shape: ", VideoIds_train.shape)
print("Framestamps_train.shape: ", Framestamps_train.shape)
print("Xcaptions_train.shape: ", Xcaptions_train.shape)
print("Ycaptions_train.shape: ", Ycaptions_train.shape)
print("H_train.shape: ", H_train.shape)
print("Ipast_train.shape: ", Ipast_train.shape)
print("Ifuture_train.shape: ", Ifuture_train.shape)
print()

In [None]:
#num_val = 50
num_val = 250

# Load Validation Data
val_file = home_dir + "/SPJ/train_val_300.csv"
#val_file = home_dir + "/SPJ/train_2400.csv"
val_ids,val_data,val_padded_proposals,val_padded_framestamps = video_preprocess(home_dir, val_file, max_num_proposals)

# Train Captions
val_padded_sentences,val_padded_sentences_2,val_padded_sentences_id = get_padded_sentences_id(pad_len, val_ids, val_data, word2id, max_num_proposals) 
Ycaptions_val = np.transpose(copy.deepcopy(val_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_val,:,1:]
Xcaptions_val = np.transpose(copy.deepcopy(val_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_val]
Ycaptions_val = truncate_captions(Ycaptions_val)
Xcaptions_val = truncate_captions(Xcaptions_val)


# Train Features 
VideoIds_val = val_ids[:num_val]
Framestamps_val = val_padded_framestamps[:num_val]
H_val = val_padded_proposals.astype(np.float32)[:num_val]
Ipast_val = temporal_indicator(val_padded_framestamps, mode="past").astype(np.float32)[:num_val]
Ifuture_val = temporal_indicator(val_padded_framestamps, mode="future").astype(np.float32)[:num_val]

num_val = len(val_ids[:num_val])
print("Number of Validation Examples:", num_val)
print()
print("VideoIds_val.shape: ", VideoIds_val.shape)
print("Framestamps_val.shape: ", Framestamps_val.shape)
print("Xcaptions_val.shape: ", Xcaptions_val.shape)
print("Ycaptions_val.shape: ", Ycaptions_val.shape)
print("H_val.shape: ", H_val.shape)
print("Ipast_val.shape: ", Ipast_val.shape)
print("Ifuture_val.shape: ", Ifuture_val.shape)
print()

In [None]:
#num_val = 50
num_test = 250

# Load Validation Data
test_file = home_dir + "/SPJ/train_test_300.csv"
#val_file = home_dir + "/SPJ/train_2400.csv"
val_ids,test_data,test_padded_proposals,test_padded_framestamps = video_preprocess(home_dir, test_file, max_num_proposals)

# Train Captions
test_padded_sentences,val_padded_sentences_2,test_padded_sentences_id = get_padded_sentences_id(pad_len, test_ids, test_data, word2id, max_num_proposals) 
Ycaptions_test = np.transpose(copy.deepcopy(test_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_test,:,1:]
Xcaptions_test = np.transpose(copy.deepcopy(test_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_test]
Ycaptions_test = truncate_captions(Ycaptions_test)
Xcaptions_test = truncate_captions(Xcaptions_test)


# Train Features 
VideoIds_test = test_ids[:num_test]
Framestamps_test = test_padded_framestamps[:num_test]
H_test = test_padded_proposals.astype(np.float32)[:num_test]
Ipast_test = temporal_indicator(val_padded_framestamps, mode="past").astype(np.float32)[:num_test]
Ifuture_test = temporal_indicator(test_padded_framestamps, mode="future").astype(np.float32)[:num_test]

num_val = len(val_ids[:num_val])
print("Number of Validation Examples:", num_val)
print()
print("VideoIds_val.shape: ", VideoIds_val.shape)
print("Framestamps_val.shape: ", Framestamps_val.shape)
print("Xcaptions_val.shape: ", Xcaptions_val.shape)
print("Ycaptions_val.shape: ", Ycaptions_val.shape)
print("H_val.shape: ", H_val.shape)
print("Ipast_val.shape: ", Ipast_val.shape)
print("Ifuture_val.shape: ", Ifuture_val.shape)
print()

# Graph

In [None]:
import LOUPE.WILLOW.loupe as lp

def lstm_cell(hidden_dim, p_dropout):
        lstm = tf.nn.rnn_cell.LSTMCell(num_units=hidden_dim,state_is_tuple=True)
        lstm_dropout = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=p_dropout, output_keep_prob=p_dropout)
        return lstm
    
def build_graph(hyperparameters):
    (batch_size, hidden_dim, num_proposals, num_c3d_features, num_layers, learning_rate, keep_prob_p) = hyperparameters
    
    global_step = tf.Variable(0, name='global_step', trainable=False)
    
    NetVLAD = lp.NetVLAD(feature_size=num_c3d_features, max_samples=2000, cluster_size=64, 
                         output_dim=num_c3d_features, gating=True, add_batch_norm=True,
                         is_training=True)
    # Placeholders
    x=tf.placeholder(
        dtype=tf.int32,
        shape=[batch_size,num_proposals,None], 
        name="x"
    )
    y=tf.placeholder(
        dtype=tf.int32,
        shape=[batch_size,num_proposals,None],
        name="y")
    Hp=tf.placeholder(
                dtype=tf.float32,
                shape=[batch_size,num_proposals, 2000, num_c3d_features],
                name="Hp")
    Hp2 = tf.reshape(Hp, [-1, 500])
    netvlad = NetVLAD.forward(Hp2)

    feature_inputs = tf.expand_dims(netvlad,1)

    # Trainable Word Embeddings, embedding_inputs.shape=[-1, None, 512]
    embeddings = tf.get_variable('embedding_matrix', [num_classes, hidden_dim])
    x2 = tf.reshape(x,[batch_size*num_proposals,-1]) 
    embedding_inputs = tf.nn.embedding_lookup(
        params = embeddings, 
        ids = x2
    )

    # LSTM Layer
    n = tf.shape(x2)[1]
    feature_inputs = tf.tile(input=feature_inputs,multiples=[1,n,1]) # 
    lstm_inputs = tf.concat(values=[feature_inputs, embedding_inputs],axis=2) # [batchsize*30,50,512+1500]
    lstm = tf.contrib.rnn.MultiRNNCell([lstm_cell(hidden_dim, keep_prob_p) for _ in range(num_layers)])
    initial_state = lstm.zero_state(batch_size*num_proposals, tf.float32) 
    lstm_outputs, final_state = tf.nn.dynamic_rnn(
        cell=lstm,
        inputs=lstm_inputs,
        initial_state=initial_state
    ) # lstm_outputs: [batchsize*30,50,512]
    logits = tf.layers.dense(inputs=tf.reshape(lstm_outputs,[-1,hidden_dim]),units=num_classes)
    predictions = tf.argmax(logits,1)
    # End Captioning Module
    # -----------------------

    # Predictions
    predictions = tf.reshape(predictions, [batch_size,num_proposals,-1], name="predictions")

    # loss function
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.reshape(y,[-1])), name="loss")
      
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss, global_step=global_step, name='optimizer')
        

# Training Function

In [None]:
def model(all_train, all_val, hyperparameters, num_epochs, home_dir, version, print_cost = True):
    """
    Implements a tensorflow neural network: C3D->ATTENTION->CAPTIONING
    
    Arguments:
    H_train -- training set, of shape = [n_train,num_c3d_features,num_proposals]
    Y_train -- caption labels, of shape = [n_train,num_proposals,num_steps+1]
    H_test -- training set, of shape = [n_test,num_c3d_features,num_proposals]
    Y_test -- caption labels, of shape = [n_test,num_proposals,num_steps+1]
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    (batch_size, hidden_dim, num_proposals, num_c3d_features, num_layers, learning_rate, keep_prob_p) = hyperparameters
    
    (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train) = all_train
    (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)   = all_val
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    tensorboard_dir =  home_dir + "/tensorboard_" + str(version) + "/"
    print("Checkpoint directory: ", checkpoint_dir)
    print("Tensorboard directory: ", tensorboard_dir)    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Number of Training Examples
    num_train = H_train.shape[0] 
    num_val = H_val.shape[0] 
    
    # to keep track of costs
    costs = []
    
    # Model
    tf.reset_default_graph()
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint == None:
        new_model = True
        build_graph(hyperparameters)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
    else:
        new_model = False    
        saver = tf.train.import_meta_graph(latest_checkpoint +'.meta')

    graph = tf.get_default_graph()
    Hp = graph.get_tensor_by_name('Hp:0')
    global_step = graph.get_tensor_by_name('global_step:0')
    x           = graph.get_tensor_by_name('x:0')
    y           = graph.get_tensor_by_name('y:0')
    loss        = graph.get_tensor_by_name('loss:0')
    predictions        = graph.get_tensor_by_name('predictions:0')
    optimizer   = graph.get_operation_by_name('optimizer')
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(max_to_keep=5)

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        #Tensorboard
        summary_writer = tf.summary.FileWriter(tensorboard_dir,sess.graph)
    
        # check for latest checkpoint
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
        
        # Training Loop
        for epoch in range(num_epochs):
            
            # Record start time
            print()
            start = process_time() 
            
            # Variable to store cost
            epoch_train_loss = 0.0
            epoch_val_loss = 0.0
            train_losses = []
            val_losses = []
            
            # Get minibatches
            num_train_minibatches = num_train // batch_size 
            num_val_minibatches = num_val // batch_size 
            seed = seed + 1
            train_minibatches = random_mini_batches(VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, batch_size , seed)
            val_minibatches = random_mini_batches(VideoIds_val, Framestamps_val, H_val, Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val,   batch_size , seed)
            
            for counter, train_minibatch in enumerate(train_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_train, minibatch_Framestamps_train, minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatch
                #minibatch_Ycaptions_train = id_2_one_hot_void_padding(minibatch_Ycaptions_train, spj.config.num_classes, void_dim=0)
                minibatch_Hp_train = sample_features(minibatch_VideoIds_train, minibatch_Framestamps_train, num_proposals, home_dir)
                
                # Run Train Session
                train_feed={
                    Hp: minibatch_Hp_train, 
                    x: minibatch_Xcaptions_train, 
                    y: minibatch_Ycaptions_train
                }
                _ , minibatch_train_loss = sess.run([optimizer, loss], feed_dict=train_feed)
                train_losses.append(minibatch_train_loss)
                
            for counter, val_minibatch in enumerate(val_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_val, minibatch_Framestamps_val, minibatch_H_val, minibatch_Ipast_val, minibatch_Ifuture_val, minibatch_Ycaptions_val, minibatch_Xcaptions_val) = val_minibatch
                #minibatch_Ycaptions_val = id_2_one_hot_void_padding(minibatch_Ycaptions_val, spj.config.num_classes, void_dim=0)
                minibatch_Hp_val = sample_features(minibatch_VideoIds_val, minibatch_Framestamps_val, num_proposals, home_dir)
                
                # Run Validation Session
                val_feed={
                    Hp: minibatch_Hp_val, 
                    x: minibatch_Xcaptions_val, 
                    y: minibatch_Ycaptions_val
                }
                minibatch_val_loss = sess.run([loss], feed_dict=val_feed) #
                val_losses.append(minibatch_val_loss)
            
            epoch_train_loss = np.mean(train_losses)
            epoch_val_loss = np.mean(val_losses)
            
            # Print cost
            if print_cost == True:
                global_epoch = tf.train.global_step(sess, global_step)//num_train_minibatches
                print("Epoch: ", global_epoch)
                print("Current Learning Rate", learning_rate)
                print ("Training Loss: ", epoch_train_loss)
                print ("Validation Loss: ", epoch_val_loss)
                # Add and Write to Tensorboard
                train_summary = tf.Summary()
                val_summary = tf.Summary()
                train_summary.value.add(tag="train_losss", simple_value=epoch_train_loss)
                train_summary.value.add(tag="val_losss", simple_value=epoch_val_loss)
                summary_writer.add_summary(train_summary, global_epoch)
                summary_writer.add_summary(val_summary, global_epoch)

            
            # Save Model (every 20 epochs)
            if global_epoch % 10 == 0:
                print("Saving Checkpoint for global_step " + str(global_epoch))
                saver.save(sess, checkpoint_dir + 'model', global_step = global_epoch)
        
            # Save and Print Processed Time
            end = process_time() 
            print()
            print("Time Elapased: ", end - start)
        
        return 0

# Run Training

In [None]:
# Train Model
num_epochs = 1000
all_train = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
all_val =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
execute = model(all_train, all_val, hyperparameters, num_epochs, home_dir, version)

In [None]:
def true_feed_caption_generation(data,hyperparameters, home_dir, version):
    
    (batch_size, hidden_dim, num_proposals, num_c3d_features, num_layers, learning_rate, keep_prob_p) = hyperparameters
    
    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    tensorboard_dir =  home_dir + "/tensorboard_" + str(version) + "/"
    print("Checkpoint directory: ", checkpoint_dir)
    print("Tensorboard directory: ", tensorboard_dir)    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Model
    tf.reset_default_graph()
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    #latest_checkpoint = '/home/martnzjulio_a/songze/checkpoints_netvlad_test1/model-60'
    if latest_checkpoint == None:
        new_model = True
        build_graph(hyperparameters)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
    else:
        new_model = False    
        saver = tf.train.import_meta_graph(latest_checkpoint +'.meta')

    graph = tf.get_default_graph()
    Hp = graph.get_tensor_by_name('Hp:0')
    global_step = graph.get_tensor_by_name('global_step:0')
    x           = graph.get_tensor_by_name('x:0')
    y           = graph.get_tensor_by_name('y:0')
    predictions = graph.get_tensor_by_name('predictions:0')
    loss        = graph.get_tensor_by_name('loss:0')
    optimizer   = graph.get_operation_by_name('optimizer')
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(max_to_keep=5)
    
    # Start Session
    with tf.Session() as sess:

        # check for latest checkpoint
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // batch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, batch_size, seed) 
        
        losses = []
        
        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            #minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            minibatch_Hp = sample_features(minibatch_VideoIds, minibatch_Framestamps, num_proposals, home_dir)
            
            # Feed
            feed={
                Hp: minibatch_Hp, 
                x: minibatch_Xcaptions, 
                y: minibatch_Ycaptions
            }
            
            # Run Predictions
            loss_, pred = sess.run([loss, predictions], feed_dict=feed) 
            
            #lab = np.argmax(lab,axis=3)
            losses.append(loss_)
            
            # Cache Results
            if counter == 0:
                predictions_ = pred
                labels = minibatch_Ycaptions # lab
                ids = minibatch_VideoIds
            else:
                predictions_ = np.concatenate((predictions_,pred),axis=0)
                labels = np.concatenate((labels,minibatch_Ycaptions),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
        avg_loss = np.mean(losses)
        print(avg_loss)

    return predictions_, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
#data =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
#data =   (VideoIds_test, Framestamps_test, H_test,   Ipast_test,   Ifuture_test,   Ycaptions_test,   Xcaptions_test)
predictions2, labels2, ids2 = true_feed_caption_generation(data, hyperparameters, home_dir, version)

In [None]:
def caption_generator(sess, H, Ipast, Ifuture, labels):        
        assert (H.shape[0] == self.config.batch_size),"batch sizes do not match!"
        x_temp = np.ones([self.config.batch_size, self.config.num_proposals, 1])*2 # b'<sta>': 2
        y_temp = np.ones([self.config.batch_size, self.config.num_proposals,1])
        while (x_temp.shape[2] - 1) < self.config.num_steps:
            feed = {
                Hp: H_temp,
                x: x_temp,
                y: y_temp
            }
            predictions = sess.run(predictions, feed_dict=feed)
            next_x = predictions[:,:,-1]
            next_x = np.expand_dims(next_x, axis=2)
            x_temp = np.concatenate((x,next_x),axis=2)
            y_temp = np.ones([self.config.batch_size, self.config.num_proposals,x.shape[2]])
        return predictions, labels

def direct_caption_generation(data,hyperparameters, home_dir, version):
    
    (batch_size, hidden_dim, num_proposals, num_c3d_features, num_layers, learning_rate, keep_prob_p) = hyperparameters
    
    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    tensorboard_dir =  home_dir + "/tensorboard_" + str(version) + "/"
    print("Checkpoint directory: ", checkpoint_dir)
    print("Tensorboard directory: ", tensorboard_dir)    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Model
    tf.reset_default_graph()
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    #latest_checkpoint = '/home/martnzjulio_a/songze/checkpoints_netvlad_test1/model-60'
    if latest_checkpoint == None:
        new_model = True
        build_graph(hyperparameters)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
    else:
        new_model = False    
        saver = tf.train.import_meta_graph(latest_checkpoint +'.meta')

    graph = tf.get_default_graph()
    Hp = graph.get_tensor_by_name('Hp:0')
    global_step = graph.get_tensor_by_name('global_step:0')
    x           = graph.get_tensor_by_name('x:0')
    y           = graph.get_tensor_by_name('y:0')
    predictions = graph.get_tensor_by_name('predictions:0')
    loss        = graph.get_tensor_by_name('loss:0')
    optimizer   = graph.get_operation_by_name('optimizer')
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(max_to_keep=5)
    
    # Start Session
    with tf.Session() as sess:

        # check for latest checkpoint
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // batch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, batch_size, seed) 
        
        
        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            #minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            minibatch_Hp = sample_features(minibatch_VideoIds, minibatch_Framestamps, num_proposals, home_dir)
            
            
            # Caption Generator Here
            x_temp = np.ones([batch_size, num_proposals, 1])*2 # b'<sta>': 2
            print(x_temp.shape)
            y_temp = np.ones([batch_size, num_proposals,1])
            while (x_temp.shape[2] - 1) < num_steps:
                feed = {
                    Hp: minibatch_Hp,
                    x: x_temp,
                    y: y_temp
                }
                pred = sess.run(predictions, feed_dict=feed)
                #print(predictions_.shape)
                next_x = pred[:,:,-1]
                next_x = np.expand_dims(next_x, axis=2)
                #print(next_x.shape)
                x_temp = np.concatenate((x_temp,next_x),axis=2)
                y_temp = np.ones([batch_size, num_proposals,x_temp.shape[2]])
            # ----------------------
            
            # Cache Results
            if counter == 0:
                predictions_ = pred
                labels = minibatch_Ycaptions # lab
                ids = minibatch_VideoIds
            else:
                predictions_ = np.concatenate((predictions_,pred),axis=0)
                labels = np.concatenate((labels,minibatch_Ycaptions),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
                
    return predictions_, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
predictions1, labels1, ids1 = direct_caption_generation(data, hyperparameters, home_dir, version)

In [68]:
#print_pred_and_labels(predictions2, labels2, ids2, id2word, example=6, proposal=1)
print_pred_and_labels(predictions1, labels1, ids1, id2word, example=5, proposal=2)


VIDEO ID             PREDICTION           LABEL               
--------             -----                -----               
v_maXU1lGguxs        the                  the                 
v_maXU1lGguxs        woman                woman               
v_maXU1lGguxs        stops                stops               
v_maXU1lGguxs        playing              playing             
v_maXU1lGguxs        and                  and                 
v_maXU1lGguxs        smiles               smiles              
v_maXU1lGguxs        once                 once                
v_maXU1lGguxs        down                 more                
v_maXU1lGguxs        into                 before              
v_maXU1lGguxs        the                  walking             
v_maXU1lGguxs        camera               out                 
v_maXU1lGguxs        putting              of                  
v_maXU1lGguxs        the                  view                
v_maXU1lGguxs        violin               of          

0

In [69]:
#bleu1, bleu2, bleu3, bleu4 = compute_bleu_at_1_2_3_4(labels2, predictions2)
bleu1, bleu2, bleu3, bleu4 = compute_bleu_at_1_2_3_4(labels1, predictions1)
print(bleu1, bleu2, bleu3, bleu4)

Corpus/Sentence contains 0 counts of 5-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.6072053223981028 0.5897121471765331 0.5929780102295337 0.5986776453708821
