# Imports

In [2]:
# As usual, a bit of setup
import time
from time import process_time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
import copy
import math
from utils.data_utils import *
import sys
import re
from utils.spj import Config
from utils.spj import SPJ
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Specify Model Directory

In [3]:
home_dir = "/home/martnzjulio_a/songze"
#home_dir = "/home/songzeli"
version = "overfit"
minibatch_size = 1

print()
print("DIRECTORY SET TO: ", home_dir)
print("VERSION SET TO  : ", version)


DIRECTORY SET TO:  /home/martnzjulio_a/songze
VERSION SET TO  :  overfit


# Load Vocab

In [4]:
# Load Vocabulary
embedding_size =512
pad_len, num_steps = 50, 50
vocabulary,vocab_size = caption_preprocess(home_dir)
emb_matrix,word2id,id2word = get_wordvector(embedding_size,vocab_size,vocabulary)
num_classes = len(word2id)

# Word Embedding Matrix
emb_matrix, word2id, id2word = get_wordvector(embedding_size,vocab_size,vocabulary) #changed by Songze

Total number of words in all captions:  532264
Vocabulary Size (Unique):  11125


# Load Test Data

In [6]:
# Load Test Data
test_file = home_dir + "/SPJ/train_test_300.csv"
test_ids,test_data,test_padded_proposals,test_padded_framestamps = video_preprocess(home_dir, test_file)

# Test Captions
test_padded_sentences,test_padded_sentences_2,test_padded_sentences_id = get_padded_sentences_id(pad_len, test_ids, test_data, word2id) 
Ycaptions_test = np.transpose(copy.deepcopy(test_padded_sentences_2),axes=(0,2,1)).astype(np.int32)
Ycaptions_test = Ycaptions_test[:,:,1:]
Xcaptions_test = np.transpose(copy.deepcopy(test_padded_sentences),axes=(0,2,1)).astype(np.int32)


# Test Features 
VideoIds_test = test_ids
Framestamps_test = test_padded_framestamps
H_train = test_padded_proposals.astype(np.float32)
Ipast_test = temporal_indicator(test_padded_framestamps, mode="past").astype(np.float32)
Ifuture_test = temporal_indicator(test_padded_framestamps, mode="future").astype(np.float32)

num_test = len(test_ids)
print("Number of Test Examples:", num_test)
print()
print("VideoIds_test.shape: ", VideoIds_test.shape)
print("Framestamps_test.shape: ", Framestamps_test.shape)
print("Xcaptions_test.shape: ", Xcaptions_test.shape)
print("Ycaptions_test.shape: ", Ycaptions_test.shape)
print("H_test.shape: ", H_train.shape)
print("Ipast_test.shape: ", Ipast_test.shape)
print("Ifuture_test.shape: ", Ifuture_test.shape)
print()

Number of Test Examples: 299

VideoIds_test.shape:  (299,)
Framestamps_test.shape:  (299, 2, 30)
Xcaptions_test.shape:  (299, 30, 50)
Ycaptions_test.shape:  (299, 30, 50)
H_test.shape:  (299, 500, 30)
Ipast_test.shape:  (299, 30, 30)
Ifuture_test.shape:  (299, 30, 30)



# Generate Captions (Sample)

In [None]:
tf.set_random_seed(1)                             
seed = 3            
from utils.data_utils import *

tf.reset_default_graph()
config = Config()
spj = SPJ(config)
global_step = tf.Variable(0, name='global_step', trainable=False)
checkpoint_dir = "/home/martnzjulio_a/songze/checkpoints3/"
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver = tf.train.Saver()
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)
train_minibatches = random_mini_batches(H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
(minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatches[0]   
with tf.Session() as sess:   
    saver.restore(sess, latest_checkpoint)
    word_id = spj.caption_generation(sess,minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Xcaptions_train, minibatch_Ycaptions_train)
print ("word_id: ", word_id)

# Generation Captions (Direct)

In [None]:
def setup_graph_and_saver(learning_rate):
    tf.reset_default_graph()    
    tf.set_random_seed(1)                             
    seed = 3 
    global_step = tf.Variable(0, name='global_step', trainable=False)
    config = Config()
    spj = SPJ(config)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    return spj, saver, global_step, optimizer, init, seed

def direct_caption_generation(data, learning_rate, minibatch_size, home_dir, version):

    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    
    # Directory Where Saved Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 

        # For all batchs
        predictions = np.array([])
        labels = np.array([])
        for counter, minibatch in enumerate(minibatches):
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            # Generate Captions
            pred, lab = spj.generate_caption_2(sess, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions)
            if counter == 0:
                predictions = pred
                labels = lab
                ids = minibatch_VideoIds
            else:
                predictions = np.concatenate((predictions,pred),axis=0)
                labels = np.concatenate((labels,lab),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
                
    return predictions, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
predictions, labels, ids = direct_caption_generation(data, learning_rate, minibatch_size,home_dir, version)



In [None]:
print_pred_and_labels(predictions, labels, ids, id2word, example=9, proposal=1)

# Compute Bleu Evaluation

In [None]:
labels # shape: (num_examples, num_proposals, num_steps + 1)
predictions # shape: (num_examples, num_proposals, num_steps + 1)
import nltk

# for each example
for i in num_examples:
    
    # for each proposal
    for p in  num_proposals:
        
        # if row of pads skip
        if sum(labels[i,p])==0:
            break # skip
        
        reference = []
        hypothesis = []
        # extract reference from labels
        for w in num_words:
            reference.append(labels[i,p,w])
            if labels[i,p,w] == 3:
                break # stop
                
        # extract reference from predictions
        for w in num_words:
            hypothesis.append(predictions[i,p,w])
            if labels[i,p,w] == 3:
                break # stop
        
        # comute bleu score
        weights_at_1 = (1.0)
        weights_at_2 = (0.5, 0.5)
        weights_at_3 = (0.33, 0.33, 0.33)
        weights_at_4 = (0.25, 0.25, 0.25, 0.25)
        bleu_at_1 = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis, weights_at_1)
        bleu_at_2 = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis, weights_at_2)
        bleu_at_3 = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis, weights_at_3)
        bleu_at_4 = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis, weights_at_4)
            
            

# Prediction w/ True Feed

In [None]:
def setup_graph_and_saver(learning_rate):
    tf.reset_default_graph()    
    tf.set_random_seed(1)                             
    seed = 3 
    global_step = tf.Variable(0, name='global_step', trainable=False)
    config = Config()
    spj = SPJ(config)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    return spj, saver, global_step, optimizer, init, seed

def direct_inference(data, learning_rate, minibatch_size,home_dir, version):

    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    # Directory Where Saved Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 

        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            
            # Feed
            feed = {spj._H: minibatch_H,
                    spj._Ipast: minibatch_Ipast,
                    spj._Ifuture: minibatch_Ifuture,
                    spj._x: minibatch_Xcaptions,
                    spj._y: minibatch_Ycaptions,
                    spj._keep_prob: 1.0}
            
            # Run Predictions
            pred, lab = sess.run([spj._predictions, spj._y], feed_dict=feed) 
            lab = np.argmax(lab,axis=3)
            
            # Cache Results
            if counter == 0:
                predictions = pred
                labels = lab
                ids = minibatch_VideoIds
            else:
                predictions = np.concatenate((predictions,pred),axis=0)
                labels = np.concatenate((labels,lab),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)

    return predictions, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
predictions2, labels2, ids2 = direct_inference(data, learning_rate, minibatch_size,home_dir, version)

In [None]:
print_pred_and_labels(predictions2, labels2, ids2, id2word, example=0, proposal=3)