# Setup

In [19]:
# As usual, a bit of setup
import time
from time import process_time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
import copy
import math
from utils.data_utils import *
import sys
import re
from utils.spj import Config
from utils.spj import SPJ
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

home_dir = "/home/martnzjulio_a/songze"
#home_dir = "/home/songzeli"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Training Function

In [24]:
def model(all_train, all_val, learning_rate, num_epochs, minibatch_size, num_steps, print_cost = True):
    """
    Implements a tensorflow neural network: C3D->ATTENTION->CAPTIONING
    
    Arguments:
    H_train -- training set, of shape = [n_train,num_c3d_features,num_proposals]
    Y_train -- caption labels, of shape = [n_train,num_proposals,num_steps+1]
    H_test -- training set, of shape = [n_test,num_c3d_features,num_proposals]
    Y_test -- caption labels, of shape = [n_test,num_proposals,num_steps+1]
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    (H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train) = all_train
    (H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)   = all_val
    
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints/"
    tensorboard_dir =  home_dir + "/tensorboard/"
    
    # Reset Graph
    tf.reset_default_graph()    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Number of Training Examples
    num_train = H_train.shape[0] 
    num_val = H_val.shape[0] 
    
    # to keep track of costs
    costs = []
    
    # Global Epoch Number
    global_step = tf.Variable(0, name='global_step', trainable=False)
    
    # Model
    config = Config()
    spj = SPJ(config)
        
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)
    
    # Initialize all the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()
    
    # Tensorboard Loss
    training_summary = tf.summary.scalar("training_loss", spj.loss)
    validation_summary = tf.summary.scalar("validation_loss", spj.loss)
    

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
    
        # check for latest checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
        
        #Tensorboard
        summary_writer = tf.summary.FileWriter(tensorboard_dir,sess.graph)
        
        # Training Loop
        print()
        start = process_time() # Record start time
        for epoch in range(num_epochs):
            
            # Variable to store cost
            epoch_train_loss = 0.0
            epoch_val_loss = 0.0
            
            # Get minibatches
            num_train_minibatches = num_train // minibatch_size 
            num_val_minibatches = num_val // minibatch_size 
            seed = seed + 1
            train_minibatches = random_mini_batches(H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
            val_minibatches = random_mini_batches(H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val,   minibatch_size, seed)

            for counter, train_minibatch in enumerate(train_minibatches):
                #print(counter)
                
                # Select minibatch
                (minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatch
                minibatch_Ycaptions_train = id_2_one_hot_void_padding(minibatch_Ycaptions_train, spj.config.num_classes, void_dim=0)
                
                # Run Train Session
                _ , minibatch_train_loss, train_summ = sess.run([optimizer, spj.loss, training_summary], feed_dict={spj._H: minibatch_H_train, spj._Ipast: minibatch_Ipast_train, spj._Ifuture: minibatch_Ifuture_train, spj._x: minibatch_Xcaptions_train, spj._y: minibatch_Ycaptions_train, spj._batch_size: minibatch_H_train.shape[0]}) #
                epoch_train_loss += minibatch_train_loss / num_train_minibatches
                
            for counter, val_minibatch in enumerate(val_minibatches):
                
                # Select minibatch
                (minibatch_H_val, minibatch_Ipast_val, minibatch_Ifuture_val, minibatch_Ycaptions_val, minibatch_Xcaptions_val) = val_minibatch
                minibatch_Ycaptions_val = id_2_one_hot_void_padding(minibatch_Ycaptions_val, spj.config.num_classes, void_dim=0)
                
                # Run Validation Session
                minibatch_val_loss, val_summ = sess.run([spj.loss, validation_summary], feed_dict={spj._H: minibatch_H_val, spj._Ipast: minibatch_Ipast_val, spj._Ifuture: minibatch_Ifuture_val, spj._x: minibatch_Xcaptions_val, spj._y: minibatch_Ycaptions_val, spj._batch_size: minibatch_H_val.shape[0]}) #
                epoch_val_loss += minibatch_val_loss / num_val_minibatches
             
            # Print cost
            if print_cost == True:
                global_epoch = tf.train.global_step(sess, global_step)//num_train_minibatches
                print("Epoch: ", global_epoch)
                print("Mini Batch Loss", minibatch_train_loss)
                print ("Training Loss: ", epoch_train_loss)
                print ("Validation Loss: ", epoch_val_loss)
                summary_writer.add_summary(train_summ, global_step=global_epoch) 
                summary_writer.add_summary(val_summ, global_step=global_epoch) 
            
            # Save Model (every 20 epochs)
            if global_epoch % 10 == 0:
                print("Saving Checkpoint for global_step " + str(global_epoch))
                saver.save(sess, checkpoint_dir + 'model', global_step = global_epoch)
        
        # Save and Print Processed Time
        end = process_time() 
        print()
        print("Time Elapased: ", end - start)
        
        return 0

# Load Vocab

In [15]:
# Load Vocabulary
embedding_size =512
pad_len, num_steps = 50, 50
vocabulary,vocab_size = caption_preprocess(home_dir)
emb_matrix,word2id,id2word = get_wordvector(embedding_size,vocab_size,vocabulary)
num_classes = len(word2id)

# Word Embedding Matrix
emb_matrix, word2id, id2word = get_wordvector(embedding_size,vocab_size,vocabulary) #changed by Songze

Total number of words in all captions:  504895
Vocabulary Size (Unique):  13001


# Load All Data

In [16]:
# Load All Data
all_file = home_dir + "/SPJ/train_5000.csv"
all_ids,all_data,all_padded_proposals,all_padded_framestamps = video_preprocess(home_dir, all_file)

# All Captions
all_padded_sentences,all_padded_sentences_2,all_padded_sentences_id = get_padded_sentences_id(pad_len, all_ids, all_data, word2id) 
Ycaptions_all = np.transpose(copy.deepcopy(all_padded_sentences_2),axes=(0,2,1)).astype(np.int32)
Xcaptions_all = np.transpose(copy.deepcopy(all_padded_sentences),axes=(0,2,1)).astype(np.int32)

# All Features 
H_all = all_padded_proposals.astype(np.float32)
Ipast_all = temporal_indicator(all_padded_framestamps, mode="past").astype(np.float32)
Ifuture_all = temporal_indicator(all_padded_framestamps, mode="future").astype(np.float32)

data_size = len(all_ids)
print("Number of Examples in Data Set:", data_size)

Number of Examples in Data Set: 4990


# Data Split

In [17]:
# Train, Val, Test Split
minibatch_size = 32
num_train = 3*minibatch_size
num_val = 1*minibatch_size
num_test = 1*minibatch_size

# Training Split
Xcaptions_train = Xcaptions_all[:num_train]
Ycaptions_train = Ycaptions_all[:num_train]
H_train = H_all[:num_train]
Ipast_train = Ipast_all[:num_train]
Ifuture_train = Ifuture_all[:num_train]
print("Xcaptions_train.shape: ", Xcaptions_train.shape)
print("Ycaptions_train.shape: ", Ycaptions_train.shape)
print("H_train.shape: ", H_train.shape)
print("Ipast_train.shape: ", Ipast_train.shape)
print("Ifuture_train.shape: ", Ifuture_train.shape)
print()

# Validation Split
Xcaptions_val = Xcaptions_all[num_train:num_train+num_val]
Ycaptions_val = Ycaptions_all[num_train:num_train+num_val]
H_val = H_all[num_train:num_train+num_val]
Ipast_val = Ipast_all[num_train:num_train+num_val]
Ifuture_val = Ifuture_all[num_train:num_train+num_val]
print("Xcaptions_val.shape: ", Xcaptions_val.shape)
print("Ycaptions_val.shape: ", Ycaptions_val.shape)
print("H_val.shape: ", H_val.shape)
print("Ipast_val.shape: ", Ipast_val.shape)
print("Ifuture_val.shape: ", Ifuture_val.shape)
print()

# Testing Split
Xcaptions_test = Xcaptions_all[num_train+num_val:num_train+num_val+num_test]
Ycaptions_test = Ycaptions_all[num_train+num_val:num_train+num_val+num_test]
H_test = H_all[num_train+num_val:num_train+num_val+num_test]
Ipast_test = Ipast_all[num_train+num_val:num_train+num_val+num_test]
Ifuture_test = Ifuture_all[num_train+num_val:num_train+num_val+num_test]
print("Xcaptions_test.shape: ", Xcaptions_test.shape)
print("Ycaptions_test.shape: ", Ycaptions_test.shape)
print("H_test.shape: ", H_test.shape)
print("Ipast_test.shape: ", Ipast_test.shape)
print("Ifuture_test.shape: ", Ifuture_test.shape)

Xcaptions_train.shape:  (96, 30, 50)
Ycaptions_train.shape:  (96, 30, 51)
H_train.shape:  (96, 500, 30)
Ipast_train.shape:  (96, 30, 30)
Ifuture_train.shape:  (96, 30, 30)

Xcaptions_val.shape:  (32, 30, 50)
Ycaptions_val.shape:  (32, 30, 51)
H_val.shape:  (32, 500, 30)
Ipast_val.shape:  (32, 30, 30)
Ifuture_val.shape:  (32, 30, 30)

Xcaptions_test.shape:  (32, 30, 50)
Ycaptions_test.shape:  (32, 30, 51)
H_test.shape:  (32, 500, 30)
Ipast_test.shape:  (32, 30, 30)
Ifuture_test.shape:  (32, 30, 30)


# Execute Training

In [25]:
# Train Model
learning_rate = 0.00001
num_epochs = 100
all_train = (H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
all_val =   (H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
logits = model(all_train, all_val, learning_rate, num_epochs, minibatch_size, num_steps)


Restoring from latest checkpoint...
INFO:tensorflow:Restoring parameters from /home/martnzjulio_a/songze/checkpoints/model-670

Epoch:  671
Mini Batch Loss 65595.91
Training Loss:  62669.61197916666
Validation Loss:  60917.8671875
Epoch:  672
Mini Batch Loss 61989.668
Training Loss:  62165.06770833333
Validation Loss:  60311.28515625
Epoch:  673
Mini Batch Loss 63165.61
Training Loss:  61500.59505208333
Validation Loss:  59624.09765625
Epoch:  674
Mini Batch Loss 66060.54
Training Loss:  60783.71614583333
Validation Loss:  58904.9296875
Epoch:  675
Mini Batch Loss 62243.66
Training Loss:  60042.79166666667
Validation Loss:  58167.7265625
Epoch:  676
Mini Batch Loss 58664.414
Training Loss:  59293.98567708333
Validation Loss:  57431.86328125
Epoch:  677
Mini Batch Loss 52096.94
Training Loss:  58549.02213541667
Validation Loss:  56712.625
Epoch:  678
Mini Batch Loss 58333.895
Training Loss:  57817.95703125
Validation Loss:  56014.2734375
Epoch:  679
Mini Batch Loss 59178.812
Training L

KeyboardInterrupt: 

# Generate Captions

In [None]:
learning_rate = 1e-6
minibatch_size = 4
tf.set_random_seed(1)                             
seed = 3            
from utils.data_utils import *

tf.reset_default_graph()
config = Config()
spj = SPJ(config)
global_step = tf.Variable(0, name='global_step', trainable=False)
checkpoint_dir = "/home/martnzjulio_a/songze/checkpoints/"
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver = tf.train.Saver()
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)




train_minibatches = random_mini_batches(H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
(minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatches[0]   
with tf.Session() as sess:   
    saver.restore(sess, latest_checkpoint)
    word_id = spj.caption_generation(sess,minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Xcaptions_train, minibatch_Ycaptions_train)
print ("word_id: ", word_id)

# Direct Caption Generation

In [34]:
def setup_graph_and_saver(learning_rate):
    tf.reset_default_graph()    
    tf.set_random_seed(1)                             
    seed = 3 
    global_step = tf.Variable(0, name='global_step', trainable=False)
    config = Config()
    spj = SPJ(config)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj.loss, global_step=global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    return spj, saver, global_step, optimizer, init, seed

def direct_caption_generation(data, learning_rate, minibatch_size, print_cost = True):

    # Extract Test Data
    (H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    checkpoint_dir = "/home/martnzjulio_a/songze/checkpoints/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed)

        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            # Select minibatch
            (minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            # Generate Captions
            spj.generate_caption_2(sess, minibatch_H, minibatch_Ipast, minibatch_Ifuture)

    return 0



In [44]:
data = (H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
direct_caption_generation(data, learning_rate, minibatch_size)

Restoring from latest checkpoint...
INFO:tensorflow:Restoring parameters from /home/martnzjulio_a/songze/checkpoints/model-690
predictions:  (32, 30, 2)
next_x:  (32, 30, 1)
x:  (32, 30, 1)
x:  (32, 30, 2)
predictions:  (32, 30, 3)
next_x:  (32, 30, 1)
x:  (32, 30, 2)
x:  (32, 30, 3)
predictions:  (32, 30, 4)
next_x:  (32, 30, 1)
x:  (32, 30, 3)
x:  (32, 30, 4)
predictions:  (32, 30, 2)
next_x:  (32, 30, 1)
x:  (32, 30, 1)
x:  (32, 30, 2)
predictions:  (32, 30, 3)
next_x:  (32, 30, 1)
x:  (32, 30, 2)
x:  (32, 30, 3)
predictions:  (32, 30, 4)
next_x:  (32, 30, 1)
x:  (32, 30, 3)
x:  (32, 30, 4)
predictions:  (32, 30, 2)
next_x:  (32, 30, 1)
x:  (32, 30, 1)
x:  (32, 30, 2)
predictions:  (32, 30, 3)
next_x:  (32, 30, 1)
x:  (32, 30, 2)
x:  (32, 30, 3)
predictions:  (32, 30, 4)
next_x:  (32, 30, 1)
x:  (32, 30, 3)
x:  (32, 30, 4)


0