# Imports

In [1]:
# As usual, a bit of setup
import time
from time import process_time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
import copy
import math
from utils.data_utils import *
import sys
import re
from utils.spj import Config
from utils.spj import SPJ
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


# Specify Model Directory

In [2]:
home_dir = "/home/martnzjulio_a/songze"
#home_dir = "/home/songzeli"
version = "test5"
minibatch_size = 25

print()
print("DIRECTORY SET TO: ", home_dir)
print("VERSION SET TO  : ", version)


DIRECTORY SET TO:  /home/martnzjulio_a/songze
VERSION SET TO  :  test5


# Load Vocabulary

In [3]:
# Load Vocabulary
embedding_size =512
pad_len, num_steps = 30, 30
max_num_proposals = 10
vocabulary,vocab_size = caption_preprocess(home_dir)
emb_matrix,word2id,id2word = get_wordvector(embedding_size,vocab_size,vocabulary)
num_classes = len(word2id)

# Word Embedding Matrix
emb_matrix, word2id, id2word = get_wordvector(embedding_size,vocab_size,vocabulary) #changed by Songze

Total number of words in all captions:  532264
Vocabulary Size (Unique):  11125


# Load All Data

In [9]:
num_train = 100
#num_train = 2000

# Load Training Data
#train_file = home_dir + "/SPJ/train_2400.csv"
train_file = home_dir + "/SPJ/train_val_300.csv"
train_ids,train_data,train_padded_proposals,train_padded_framestamps = video_preprocess(home_dir, train_file, max_num_proposals)

# Train Captions
train_padded_sentences,train_padded_sentences_2,train_padded_sentences_id = get_padded_sentences_id(pad_len, train_ids, train_data, word2id, max_num_proposals) 
Ycaptions_train = np.transpose(copy.deepcopy(train_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_train,:,1:]
Xcaptions_train = np.transpose(copy.deepcopy(train_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_train]

Ycaptions_train = truncate_captions(Ycaptions_train)
Xcaptions_train = truncate_captions(Xcaptions_train)


# Train Features 
VideoIds_train = train_ids[:num_train]
Framestamps_train = train_padded_framestamps[:num_train]
H_train = train_padded_proposals.astype(np.float32)[:num_train]
Ipast_train = temporal_indicator(train_padded_framestamps, mode="past").astype(np.float32)[:num_train]
Ifuture_train = temporal_indicator(train_padded_framestamps, mode="future").astype(np.float32)[:num_train]

num_train = len(train_ids[:num_train])
print("Number of Training Examples:", num_train)
print()
print("VideoIds_train.shape: ", VideoIds_train.shape)
print("Framestamps_train.shape: ", Framestamps_train.shape)
print("Xcaptions_train.shape: ", Xcaptions_train.shape)
print("Ycaptions_train.shape: ", Ycaptions_train.shape)
print("H_train.shape: ", H_train.shape)
print("Ipast_train.shape: ", Ipast_train.shape)
print("Ifuture_train.shape: ", Ifuture_train.shape)
print()

Number of Training Examples: 100

VideoIds_train.shape:  (100,)
Framestamps_train.shape:  (100, 2, 10)
Xcaptions_train.shape:  (100, 10, 30)
Ycaptions_train.shape:  (100, 10, 30)
H_train.shape:  (100, 500, 10)
Ipast_train.shape:  (100, 10, 10)
Ifuture_train.shape:  (100, 10, 10)



In [14]:
num_val = 100
#num_val = 225

# Load Validation Data
#val_file = home_dir + "/SPJ/train_val_300.csv"
val_file = home_dir + "/SPJ/train_2400.csv"
val_ids,val_data,val_padded_proposals,val_padded_framestamps = video_preprocess(home_dir, val_file, max_num_proposals)

# Train Captions
val_padded_sentences,val_padded_sentences_2,val_padded_sentences_id = get_padded_sentences_id(pad_len, val_ids, val_data, word2id, max_num_proposals) 
Ycaptions_val = np.transpose(copy.deepcopy(val_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_val,:,1:]
Xcaptions_val = np.transpose(copy.deepcopy(val_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_val]
Ycaptions_val = truncate_captions(Ycaptions_val)
Xcaptions_val = truncate_captions(Xcaptions_val)


# Train Features 
VideoIds_val = val_ids[:num_val]
Framestamps_val = val_padded_framestamps[:num_val]
H_val = val_padded_proposals.astype(np.float32)[:num_val]
Ipast_val = temporal_indicator(val_padded_framestamps, mode="past").astype(np.float32)[:num_val]
Ifuture_val = temporal_indicator(val_padded_framestamps, mode="future").astype(np.float32)[:num_val]

num_val = len(val_ids[:num_val])
print("Number of Validation Examples:", num_val)
print()
print("VideoIds_val.shape: ", VideoIds_val.shape)
print("Framestamps_val.shape: ", Framestamps_val.shape)
print("Xcaptions_val.shape: ", Xcaptions_val.shape)
print("Ycaptions_val.shape: ", Ycaptions_val.shape)
print("H_val.shape: ", H_val.shape)
print("Ipast_val.shape: ", Ipast_val.shape)
print("Ifuture_val.shape: ", Ifuture_val.shape)
print()

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [12]:
val_data

Unnamed: 0,index,duration,id,sentences,t_init,t_end,f_init,f_end,h0,h1,...,h490,h491,h492,h493,h494,h495,h496,h497,h498,h499
0,0,139.039993,v_sJFgo9H6zNo,A little boy playing on a jungle gym.,3.480000,17.379999,13.0,65.0,7.654006,4.992683,...,0.338641,1.094353,0.751133,1.265445,0.538537,0.419639,0.815081,0.831750,0.702886,0.774644
1,1,139.039993,v_sJFgo9H6zNo,He is climbing up a set of stairs.,3.480000,13.900000,13.0,52.0,5.612783,4.992683,...,0.338641,0.587180,0.751133,1.265445,0.538537,0.419639,0.815081,0.831750,0.568674,0.774644
2,2,139.039993,v_sJFgo9H6zNo,Now is sliding fast down a slide.,9.040000,18.770000,34.0,70.0,8.181156,4.992683,...,0.150293,1.094353,0.751133,1.265445,0.533571,0.405774,0.420415,0.831750,0.702886,0.468485
3,3,139.039993,v_sJFgo9H6zNo,Another boy is shown playing in the grass.,19.469999,27.110001,73.0,101.0,5.359421,-0.657509,...,0.435154,0.475055,0.670092,1.274009,0.280949,0.574728,0.614649,0.349273,0.736613,0.521650
4,4,139.039993,v_sJFgo9H6zNo,Now the first boy is being picked up by a woman.,27.110001,38.240002,101.0,143.0,5.684093,6.129234,...,0.351477,0.895881,1.250099,1.170916,0.612918,0.439201,0.410789,0.635423,0.859878,1.392786
5,5,139.039993,v_sJFgo9H6zNo,Now he is going to go down the slide once again.,33.369999,45.189999,125.0,169.0,7.909914,2.312549,...,0.240413,1.042620,1.250099,1.170916,0.457505,0.321312,0.410789,0.424273,0.509968,0.721367
6,6,139.039993,v_sJFgo9H6zNo,The boy likes playing with a large wheel on t...,45.880001,51.450001,171.0,192.0,3.990629,-0.526764,...,0.342754,0.201278,0.284844,1.596033,0.262489,0.407763,0.560005,0.003124,0.319196,1.027945
7,7,139.039993,v_sJFgo9H6zNo,"Once again he goes down the slide, but with d...",65.349998,78.559998,244.0,293.0,8.883933,1.821796,...,0.174361,1.631389,0.682707,0.787518,0.635641,0.633001,0.898898,0.774483,0.524824,1.036477
8,8,139.039993,v_sJFgo9H6zNo,He goes down the slide a 4th time but with an...,84.820000,98.019997,317.0,366.0,3.955229,4.337695,...,0.356144,0.338375,0.597890,1.023139,0.403459,0.848032,0.755457,0.675824,0.992041,0.805185
9,9,139.039993,v_sJFgo9H6zNo,He goes down the slide several more times.,111.930000,130.699997,418.0,488.0,7.660805,5.674644,...,0.633585,0.628072,1.157326,1.441646,0.690586,0.573043,0.945901,1.002684,0.908242,1.135305


In [13]:
train_data

Unnamed: 0,index,duration,id,sentences,t_init,t_end,f_init,f_end,h0,h1,...,h490,h491,h492,h493,h494,h495,h496,h497,h498,h499
0,0,92.139999,v_yqFdxxWgvh8,A woman puts ice in a glass.,18.430000,29.020000,69.0,108.0,10.591444,9.295307,...,0.512891,0.325693,0.592476,0.977704,0.700074,0.639074,0.377837,-0.135583,0.932509,0.364629
1,1,92.139999,v_yqFdxxWgvh8,She dumps a bottle into the glass and adds ju...,18.430000,70.019997,69.0,261.0,10.591444,9.533868,...,0.744511,0.812314,0.683264,1.054004,0.784580,0.895578,0.817763,0.335670,1.633817,0.489831
2,2,92.139999,v_yqFdxxWgvh8,She stirs the drink and sets it down on the c...,71.870003,77.400002,268.0,288.0,7.453258,9.370346,...,0.895464,0.169415,-0.130921,1.093463,0.555662,0.328732,0.009399,-0.685822,1.151089,-0.147268
3,0,7.760000,v_uc1RbyLfuSs,A person is wind sailing through a body of water.,0.000000,7.760000,0.0,26.0,5.546053,-5.349733,...,0.847409,0.758656,0.554028,0.514166,1.064305,0.395815,0.544007,0.327727,0.260605,0.543664
4,1,7.760000,v_uc1RbyLfuSs,The person passes someone swimming.,0.000000,2.130000,0.0,7.0,3.824407,-6.282783,...,0.591036,0.758656,0.554028,0.154243,0.680983,0.395815,0.172772,0.327727,0.260605,0.543664
5,2,7.760000,v_uc1RbyLfuSs,The person turns on their board.,4.030000,7.760000,14.0,26.0,5.546053,-5.349733,...,0.782640,0.635181,0.099991,0.514166,0.488233,0.098780,0.544007,0.121294,0.211139,-0.002110
6,0,120.089996,v_3SL_Au0H0cw,man is wearing a black kimono and isdoing yoga...,0.000000,120.089996,0.0,374.0,8.887844,5.674127,...,1.189211,0.752200,1.146049,1.297203,1.195715,0.680125,0.345901,1.188765,0.293774,0.759136
7,1,120.089996,v_3SL_Au0H0cw,man is standing in a large green grassy field.,3.000000,120.089996,9.0,374.0,2.776588,5.674127,...,1.189211,0.752200,1.146049,1.297203,1.195715,0.680125,0.345901,1.188765,0.293774,0.759136
8,2,120.089996,v_3SL_Au0H0cw,man is slowly walking practicing boxing movem...,0.000000,118.889999,0.0,370.0,8.887844,5.674127,...,1.189211,0.752200,1.146049,1.297203,1.195715,0.680125,0.345901,1.188765,0.293774,0.759136
9,0,12.930000,v_uxsZFuXKs3A,A man is outdoors standing on a 4 step tool an...,0.000000,1.680000,0.0,6.0,0.970290,-4.777149,...,0.343152,0.488934,0.149111,0.185287,0.458928,0.277895,0.054650,0.396673,0.346066,0.199127


# Training Function

In [7]:
def model(all_train, all_val, starter_learning_rate, keep_prob, num_epochs, home_dir, version, print_cost = True):
    """
    Implements a tensorflow neural network: C3D->ATTENTION->CAPTIONING
    
    Arguments:
    H_train -- training set, of shape = [n_train,num_c3d_features,num_proposals]
    Y_train -- caption labels, of shape = [n_train,num_proposals,num_steps+1]
    H_test -- training set, of shape = [n_test,num_c3d_features,num_proposals]
    Y_test -- caption labels, of shape = [n_test,num_proposals,num_steps+1]
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train) = all_train
    (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)   = all_val
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    tensorboard_dir =  home_dir + "/tensorboard_" + str(version) + "/"
    print("Checkpoint directory: ", checkpoint_dir)
    print("Tensorboard directory: ", tensorboard_dir)
    
    # Reset Graph
    tf.reset_default_graph()    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Number of Training Examples
    num_train = H_train.shape[0] 
    num_val = H_val.shape[0] 
    
    # to keep track of costs
    costs = []
    
    
    # Model
    config = Config()
    spj = SPJ(config)
    
    # Print Hyperparameters
    print()
    print("Hyperparameters:")
    print("----------------")
    print("Starter Learning Rate: ", starter_learning_rate)
    print("Number of Proposals: ", spj.config.num_proposals)
    print("C3D Features Dim: ", spj.config.num_c3d_features )
    print("Batch Size: ", spj.config.batch_size)
    print("Dropout Keep Prob: ", keep_prob)
    print("Vocab Size: ", spj.config.num_classes)
    print("Number of LSTM Time Steps: ", spj.config.num_steps)
    print("Word Embedding Size: " , spj.config.hidden_dim)
    print("LSTM Hidden Dim: " , spj.config.hidden_dim)
    print("LSTM Num Layers: " , spj.config.num_layers)
    
    # Global Epoch Number
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Learning Rate Decay
    learning_rate = tf.train.exponential_decay(
        learning_rate = starter_learning_rate, 
        global_step = global_step,
        decay_steps = 100000, 
        decay_rate = 0.96, 
        staircase=True)
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
    #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(spj._loss, global_step=global_step)  
    
    # Passing global_step to minimize() will increment it at each step.
    learning_step = (optimizer)
    
    # Initialize all the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(max_to_keep=5)
    
    # Tensorboard Loss
    #training_summary = tf.summary.scalar("training_loss", spj.loss)
    #validation_summary = tf.summary.scalar("validation_loss", spj.loss)
    #writer = tf.train.SummaryWriter(...)
    

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        #Tensorboard
        summary_writer = tf.summary.FileWriter(tensorboard_dir,sess.graph)
    
        # check for latest checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
        
        # Training Loop
        for epoch in range(num_epochs):
            
            # Record start time
            print()
            start = process_time() 
            
            # Variable to store cost
            epoch_train_loss = 0.0
            epoch_val_loss = 0.0
            train_losses = []
            val_losses = []
            
            # Get minibatches
            num_train_minibatches = num_train // spj.config.batch_size 
            num_val_minibatches = num_val // spj.config.batch_size 
            seed = seed + 1
            train_minibatches = random_mini_batches(VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, spj.config.batch_size , seed)
            val_minibatches = random_mini_batches(VideoIds_val, Framestamps_val, H_val, Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val,   spj.config.batch_size , seed)
            
            for counter, train_minibatch in enumerate(train_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_train, minibatch_Framestamps_train, minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatch
                minibatch_Ycaptions_train = id_2_one_hot_void_padding(minibatch_Ycaptions_train, spj.config.num_classes, void_dim=0)
                
                # Run Train Session
                train_feed={spj._H: minibatch_H_train, 
                      spj._Ipast: minibatch_Ipast_train, 
                      spj._Ifuture: minibatch_Ifuture_train, 
                      spj._x: minibatch_Xcaptions_train, 
                      spj._y: minibatch_Ycaptions_train, 
                      spj._keep_prob: keep_prob,
                      spj._reg: 0.0}
                _ , minibatch_train_loss, lr = sess.run([optimizer, spj._loss, learning_rate], feed_dict=train_feed)
                train_losses.append(minibatch_train_loss)
                
            for counter, val_minibatch in enumerate(val_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_val, minibatch_Framestamps_val, minibatch_H_val, minibatch_Ipast_val, minibatch_Ifuture_val, minibatch_Ycaptions_val, minibatch_Xcaptions_val) = val_minibatch
                minibatch_Ycaptions_val = id_2_one_hot_void_padding(minibatch_Ycaptions_val, spj.config.num_classes, void_dim=0)
                
                # Run Validation Session
                val_feed={spj._H: minibatch_H_val, 
                          spj._Ipast: minibatch_Ipast_val, 
                          spj._Ifuture: minibatch_Ifuture_val, 
                          spj._x: minibatch_Xcaptions_val, 
                          spj._y: minibatch_Ycaptions_val, 
                          spj._keep_prob: 1.0,
                          spj._reg: 0.0}
                minibatch_val_loss = sess.run([spj._loss], feed_dict=val_feed) #
                val_losses.append(minibatch_val_loss)
            
            epoch_train_loss = np.mean(train_losses)
            epoch_val_loss = np.mean(val_losses)
            
            # Print cost
            if print_cost == True:
                global_epoch = tf.train.global_step(sess, global_step)//num_train_minibatches
                print("Epoch: ", global_epoch)
                print("Current Learning Rate", lr)
                print ("Training Loss: ", epoch_train_loss)
                print ("Validation Loss: ", epoch_val_loss)
                # Add and Write to Tensorboard
                train_summary = tf.Summary()
                val_summary = tf.Summary()
                train_summary.value.add(tag="train_losss", simple_value=epoch_train_loss)
                train_summary.value.add(tag="val_losss", simple_value=epoch_val_loss)
                summary_writer.add_summary(train_summary, global_epoch)
                summary_writer.add_summary(val_summary, global_epoch)

            
            # Save Model (every 20 epochs)
            if global_epoch % 10 == 0:
                print("Saving Checkpoint for global_step " + str(global_epoch))
                saver.save(sess, checkpoint_dir + 'model', global_step = global_epoch)
        
            # Save and Print Processed Time
            end = process_time() 
            print()
            print("Time Elapased: ", end - start)
        
        return 0

# Run Training

In [11]:
# Train Model
learning_rate = 0.01
keep_prob = 1.0
num_epochs = 1000
all_train = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
all_val =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
execute = model(all_train, all_val, learning_rate, keep_prob, num_epochs, home_dir, version)

Checkpoint directory:  /home/martnzjulio_a/songze/checkpoints_test5/
Tensorboard directory:  /home/martnzjulio_a/songze/tensorboard_test5/

Hyperparameters:
----------------
Starter Learning Rate:  0.01
Number of Proposals:  10
C3D Features Dim:  500
Batch Size:  25
Dropout Keep Prob:  1.0
Vocab Size:  10999
Number of LSTM Time Steps:  30
Word Embedding Size:  512
LSTM Hidden Dim:  512
LSTM Num Layers:  2

No checkpoint exists, initializing parameters...

Epoch:  1
Current Learning Rate 0.01
Training Loss:  1.284549
Validation Loss:  1.9543447

Time Elapased:  6.709691679999992

Epoch:  2
Current Learning Rate 0.01
Training Loss:  0.93492573
Validation Loss:  2.001435

Time Elapased:  6.281559686999998

Epoch:  3
Current Learning Rate 0.01
Training Loss:  0.8740783
Validation Loss:  2.0603762

Time Elapased:  6.265835393999993

Epoch:  4
Current Learning Rate 0.01
Training Loss:  0.8541787
Validation Loss:  2.09695

Time Elapased:  6.192329725999997

Epoch:  5
Current Learning Rate 0.0

KeyboardInterrupt: 

In [None]:
def setup_graph_and_saver(learning_rate):
    tf.reset_default_graph()    
    tf.set_random_seed(1)                             
    seed = 3 
    global_step = tf.Variable(0, name='global_step', trainable=False)
    config = Config()
    spj = SPJ(config)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
    #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(spj._loss, global_step=global_step)  
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    return spj, saver, global_step, optimizer, init, seed

def direct_inference(data, learning_rate, minibatch_size,home_dir, version):

    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    # Directory Where Saved Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 
        
        losses = []
        
        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            
            # Feed
            feed={spj._H: minibatch_H, 
                  spj._Ipast: minibatch_Ipast, 
                  spj._Ifuture: minibatch_Ifuture, 
                  spj._x: minibatch_Xcaptions, 
                  spj._y: minibatch_Ycaptions, 
                  spj._keep_prob: 1.0,
                  spj._reg: 0.0
                 }
            
            # Run Predictions
            loss, pred, lab = sess.run([spj._loss, spj._predictions, spj._y], feed_dict=feed) 
            lab = np.argmax(lab,axis=3)
            losses.append(loss)
            
            # Cache Results
            if counter == 0:
                predictions = pred
                labels = lab
                ids = minibatch_VideoIds
            else:
                predictions = np.concatenate((predictions,pred),axis=0)
                labels = np.concatenate((labels,lab),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
        avg_loss = np.mean(losses)
        print(avg_loss)

    return predictions, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
#data =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
#data =   (VideoIds_test, Framestamps_test, H_test,   Ipast_test,   Ifuture_test,   Ycaptions_test,   Xcaptions_test)
predictions2, labels2, ids2 = direct_inference(data, learning_rate, minibatch_size, home_dir, version)

In [None]:
print_pred_and_labels(predictions2, labels2, ids2, id2word, example=0, proposal=0)