# Imports

In [15]:
# As usual, a bit of setup
import time
from time import process_time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
import copy
import math
from utils.data_utils import *
import sys
import re
from utils.spj import Config
from utils.spj import SPJ
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Specify Model Directory

In [2]:
# home_dir = "/home/martnzjulio_a/songze"
home_dir = "/home/songzeli"
version = "100_train_attention_in_graph"
minibatch_size = 25

print()
print("DIRECTORY SET TO: ", home_dir)
print("VERSION SET TO  : ", version)


DIRECTORY SET TO:  /home/songzeli
VERSION SET TO  :  100_train_attention_in_graph


# Load Vocabulary

In [3]:
# Load Vocabulary
embedding_size =512
pad_len, num_steps = 30, 30
max_num_proposals = 10
vocabulary,vocab_size = caption_preprocess(home_dir)
emb_matrix,word2id,id2word = get_wordvector(embedding_size,vocab_size,vocabulary)
num_classes = len(word2id)

# Word Embedding Matrix
emb_matrix, word2id, id2word = get_wordvector(embedding_size,vocab_size,vocabulary) #changed by Songze

Total number of words in all captions:  532264
Vocabulary Size (Unique):  11125


# Load All Data

In [4]:
num_train = 100

# Load Training Data
train_file = home_dir + "/SPJ/train_2400.csv"
train_ids,train_data,train_padded_proposals,train_padded_framestamps = video_preprocess(home_dir, train_file, max_num_proposals)

# Train Captions
train_padded_sentences,train_padded_sentences_2,train_padded_sentences_id = get_padded_sentences_id(pad_len, train_ids, train_data, word2id, max_num_proposals) 
Ycaptions_train = np.transpose(copy.deepcopy(train_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_train,:,1:]
Xcaptions_train = np.transpose(copy.deepcopy(train_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_train]

Ycaptions_train = truncate_captions(Ycaptions_train)
Xcaptions_train = truncate_captions(Xcaptions_train)


# Train Features 
VideoIds_train = train_ids[:num_train]
Framestamps_train = train_padded_framestamps[:num_train]
H_train = train_padded_proposals.astype(np.float32)[:num_train]
Ipast_train = temporal_indicator(train_padded_framestamps, mode="past").astype(np.float32)[:num_train]
Ifuture_train = temporal_indicator(train_padded_framestamps, mode="future").astype(np.float32)[:num_train]

num_train = len(train_ids[:num_train])
print("Number of Training Examples:", num_train)
print()
print("VideoIds_train.shape: ", VideoIds_train.shape)
print("Framestamps_train.shape: ", Framestamps_train.shape)
print("Xcaptions_train.shape: ", Xcaptions_train.shape)
print("Ycaptions_train.shape: ", Ycaptions_train.shape)
print("H_train.shape: ", H_train.shape)
print("Ipast_train.shape: ", Ipast_train.shape)
print("Ifuture_train.shape: ", Ifuture_train.shape)
print()

Number of Training Examples: 100

VideoIds_train.shape:  (100,)
Framestamps_train.shape:  (100, 2, 10)
Xcaptions_train.shape:  (100, 10, 30)
Ycaptions_train.shape:  (100, 10, 30)
H_train.shape:  (100, 500, 10)
Ipast_train.shape:  (100, 10, 10)
Ifuture_train.shape:  (100, 10, 10)



In [5]:
num_val = 50

# Load Validation Data
val_file = home_dir + "/SPJ/train_val_300.csv"
val_ids,val_data,val_padded_proposals,val_padded_framestamps = video_preprocess(home_dir, val_file, max_num_proposals)

# Train Captions
val_padded_sentences,val_padded_sentences_2,val_padded_sentences_id = get_padded_sentences_id(pad_len, val_ids, val_data, word2id, max_num_proposals) 
Ycaptions_val = np.transpose(copy.deepcopy(val_padded_sentences_2),axes=(0,2,1)).astype(np.int32)[:num_val,:,1:]
Xcaptions_val = np.transpose(copy.deepcopy(val_padded_sentences),axes=(0,2,1)).astype(np.int32)[:num_val]
Ycaptions_val = truncate_captions(Ycaptions_val)
Xcaptions_val = truncate_captions(Xcaptions_val)


# Train Features 
VideoIds_val = val_ids[:num_val]
Framestamps_val = val_padded_framestamps[:num_val]
H_val = val_padded_proposals.astype(np.float32)[:num_val]
Ipast_val = temporal_indicator(val_padded_framestamps, mode="past").astype(np.float32)[:num_val]
Ifuture_val = temporal_indicator(val_padded_framestamps, mode="future").astype(np.float32)[:num_val]

num_val = len(val_ids[:num_val])
print("Number of Validation Examples:", num_val)
print()
print("VideoIds_val.shape: ", VideoIds_val.shape)
print("Framestamps_val.shape: ", Framestamps_val.shape)
print("Xcaptions_val.shape: ", Xcaptions_val.shape)
print("Ycaptions_val.shape: ", Ycaptions_val.shape)
print("H_val.shape: ", H_val.shape)
print("Ipast_val.shape: ", Ipast_val.shape)
print("Ifuture_val.shape: ", Ifuture_val.shape)
print()

Number of Validation Examples: 50

VideoIds_val.shape:  (50,)
Framestamps_val.shape:  (50, 2, 10)
Xcaptions_val.shape:  (50, 10, 30)
Ycaptions_val.shape:  (50, 10, 30)
H_val.shape:  (50, 500, 10)
Ipast_val.shape:  (50, 10, 10)
Ifuture_val.shape:  (50, 10, 10)



# Training Function

In [6]:
def model(all_train, all_val, starter_learning_rate, keep_prob, num_epochs, home_dir, version, print_cost = True):
    """
    Implements a tensorflow neural network: C3D->ATTENTION->CAPTIONING
    
    Arguments:
    H_train -- training set, of shape = [n_train,num_c3d_features,num_proposals]
    Y_train -- caption labels, of shape = [n_train,num_proposals,num_steps+1]
    H_test -- training set, of shape = [n_test,num_c3d_features,num_proposals]
    Y_test -- caption labels, of shape = [n_test,num_proposals,num_steps+1]
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train) = all_train
    (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)   = all_val
    
    # Directory to Save Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    tensorboard_dir =  home_dir + "/tensorboard_" + str(version) + "/"
    print("Checkpoint directory: ", checkpoint_dir)
    print("Tensorboard directory: ", tensorboard_dir)
    
    # Reset Graph
    tf.reset_default_graph()    
    
    # For Consistency
    tf.set_random_seed(1)                             
    seed = 3                                         
    
    # Number of Training Examples
    num_train = H_train.shape[0] 
    num_val = H_val.shape[0] 
    
    # to keep track of costs
    costs = []
    
    
    # Model
    config = Config()
    spj = SPJ(config)
    
    # Print Hyperparameters
    print()
    print("Hyperparameters:")
    print("----------------")
    print("Starter Learning Rate: ", starter_learning_rate)
    print("Number of Proposals: ", spj.config.num_proposals)
    print("C3D Features Dim: ", spj.config.num_c3d_features )
    print("Batch Size: ", spj.config.batch_size)
    print("Dropout Keep Prob: ", keep_prob)
    print("Vocab Size: ", spj.config.num_classes)
    print("Number of LSTM Time Steps: ", spj.config.num_steps)
    print("Word Embedding Size: " , spj.config.hidden_dim)
    print("LSTM Hidden Dim: " , spj.config.hidden_dim)
    print("LSTM Num Layers: " , spj.config.num_layers)
    
    # Global Epoch Number
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Learning Rate Decay
    learning_rate = tf.train.exponential_decay(
        learning_rate = starter_learning_rate, 
        global_step = global_step,
        decay_steps = 100000, 
        decay_rate = 0.96, 
        staircase=True)
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
#     optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(spj._loss, global_step=global_step)  
    
    # Passing global_step to minimize() will increment it at each step.
    learning_step = (optimizer)
    
    # Initialize all the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(max_to_keep=5)
    
    # Tensorboard Loss
    #training_summary = tf.summary.scalar("training_loss", spj.loss)
    #validation_summary = tf.summary.scalar("validation_loss", spj.loss)
    #writer = tf.train.SummaryWriter(...)
    

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
    
        # check for latest checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint == None:
            # If no check point run the initialization
            print()
            print("No checkpoint exists, initializing parameters...")
            sess.run(init)
        else:
            print()
            print("Restoring from latest checkpoint...")
            saver.restore(sess, latest_checkpoint)
            sess.run(init)
        
        #Tensorboard
        summary_writer = tf.summary.FileWriter(tensorboard_dir,sess.graph)
        
        # Training Loop
        for epoch in range(num_epochs):
            
            # Record start time
            print()
            start = process_time() 
            
            # Variable to store cost
            epoch_train_loss = 0.0
            epoch_val_loss = 0.0
            train_losses = []
            val_losses = []
            
            # Get minibatches
            num_train_minibatches = num_train // spj.config.batch_size 
            num_val_minibatches = num_val // spj.config.batch_size 
            seed = seed + 1
            train_minibatches = random_mini_batches(VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, spj.config.batch_size , seed)
            val_minibatches = random_mini_batches(VideoIds_val, Framestamps_val, H_val, Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val,   spj.config.batch_size , seed)
            
            for counter, train_minibatch in enumerate(train_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_train, minibatch_Framestamps_train, minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatch
                minibatch_Ycaptions_train = id_2_one_hot_void_padding(minibatch_Ycaptions_train, spj.config.num_classes, void_dim=0)
                
                # Run Train Session
                train_feed={spj._H: minibatch_H_train, 
                      spj._Ipast: minibatch_Ipast_train, 
                      spj._Ifuture: minibatch_Ifuture_train, 
                      spj._x: minibatch_Xcaptions_train, 
                      spj._y: minibatch_Ycaptions_train, 
                      spj._keep_prob: keep_prob,
                      spj._reg: 0.0}
                _ , minibatch_train_loss = sess.run([optimizer, spj._loss], feed_dict=train_feed)
                train_losses.append(minibatch_train_loss)
                
            for counter, val_minibatch in enumerate(val_minibatches):
                
                # Select minibatch
                (minibatch_VideoIds_val, minibatch_Framestamps_val, minibatch_H_val, minibatch_Ipast_val, minibatch_Ifuture_val, minibatch_Ycaptions_val, minibatch_Xcaptions_val) = val_minibatch
                minibatch_Ycaptions_val = id_2_one_hot_void_padding(minibatch_Ycaptions_val, spj.config.num_classes, void_dim=0)
                
                # Run Validation Session
                val_feed={spj._H: minibatch_H_val, 
                          spj._Ipast: minibatch_Ipast_val, 
                          spj._Ifuture: minibatch_Ifuture_val, 
                          spj._x: minibatch_Xcaptions_val, 
                          spj._y: minibatch_Ycaptions_val, 
                          spj._keep_prob: 1.0,
                          spj._reg: 0.0}
                minibatch_val_loss = sess.run([spj._loss], feed_dict=val_feed) #
                val_losses.append(minibatch_val_loss)
            
            epoch_train_loss = np.mean(train_losses)
            epoch_val_loss = np.mean(val_losses)
            
            # Print cost
            if print_cost == True:
                global_epoch = tf.train.global_step(sess, global_step)//num_train_minibatches
                print("Epoch: ", global_epoch)
                print ("Training Loss: ", epoch_train_loss)
                print ("Validation Loss: ", epoch_val_loss)
                # Add and Write to Tensorboard
                train_summary = tf.Summary()
                val_summary = tf.Summary()
                train_summary.value.add(tag="train_losss", simple_value=epoch_train_loss)
                train_summary.value.add(tag="val_losss", simple_value=epoch_val_loss)
                summary_writer.add_summary(train_summary, global_epoch)
                summary_writer.add_summary(val_summary, global_epoch)

            
            # Save Model (every 20 epochs)
            if global_epoch % 10 == 0:
                print("Saving Checkpoint for global_step " + str(global_epoch))
                saver.save(sess, checkpoint_dir + 'model', global_step = global_epoch)
        
            # Save and Print Processed Time
            end = process_time() 
            print()
            print("Time Elapased: ", end - start)
        
        return 0

# Run Training

In [9]:
# Train Model
learning_rate = 0.01
keep_prob = 0.8
num_epochs = 500
all_train = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
all_val =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
execute = model(all_train, all_val, learning_rate, keep_prob, num_epochs, home_dir, version)

Checkpoint directory:  /home/songzeli/checkpoints_100_train_attention_in_graph/
Tensorboard directory:  /home/songzeli/tensorboard_100_train_attention_in_graph/

Hyperparameters:
----------------
Starter Learning Rate:  0.01
Number of Proposals:  10
C3D Features Dim:  500
Batch Size:  25
Dropout Keep Prob:  0.8
Vocab Size:  10999
Number of LSTM Time Steps:  30
Word Embedding Size:  512
LSTM Hidden Dim:  512
LSTM Num Layers:  2

No checkpoint exists, initializing parameters...

Epoch:  1
Training Loss:  2.1979616
Validation Loss:  1.075989

Time Elapased:  6.552239633999761

Epoch:  2
Training Loss:  1.70839
Validation Loss:  1.0904464

Time Elapased:  6.067095675000019

Epoch:  3
Training Loss:  1.5933272
Validation Loss:  1.0959735

Time Elapased:  5.9491887409999435

Epoch:  4
Training Loss:  1.5524871
Validation Loss:  1.0952923

Time Elapased:  5.716236482000113

Epoch:  5
Training Loss:  1.5170884
Validation Loss:  1.0897216

Time Elapased:  6.025244570999803

Epoch:  6
Training L

Epoch:  76
Training Loss:  1.1646026
Validation Loss:  1.1335552

Time Elapased:  5.922767295000085

Epoch:  77
Training Loss:  1.1621168
Validation Loss:  1.1332083

Time Elapased:  5.923474907999662

Epoch:  78
Training Loss:  1.1618859
Validation Loss:  1.1387823

Time Elapased:  5.893849670000236

Epoch:  79
Training Loss:  1.1607641
Validation Loss:  1.1281638

Time Elapased:  5.952097919999687

Epoch:  80
Training Loss:  1.1629307
Validation Loss:  1.1292666
Saving Checkpoint for global_step 80

Time Elapased:  6.288596659000177

Epoch:  81
Training Loss:  1.1583257
Validation Loss:  1.1373034

Time Elapased:  5.935546414999408

Epoch:  82
Training Loss:  1.1556821
Validation Loss:  1.1282922

Time Elapased:  5.97681093600022

Epoch:  83
Training Loss:  1.152647
Validation Loss:  1.1343815

Time Elapased:  5.788444332999461

Epoch:  84
Training Loss:  1.1458685
Validation Loss:  1.1363485

Time Elapased:  5.971957251000276

Epoch:  85
Training Loss:  1.1412892
Validation Loss:  1

Epoch:  154
Training Loss:  1.0699996
Validation Loss:  1.1453427

Time Elapased:  5.8646460750005645

Epoch:  155
Training Loss:  1.0706296
Validation Loss:  1.1396618

Time Elapased:  5.895059309000317

Epoch:  156
Training Loss:  1.0674427
Validation Loss:  1.1443784

Time Elapased:  6.043234819000645

Epoch:  157
Training Loss:  1.0627346
Validation Loss:  1.1453336

Time Elapased:  5.924158310999701

Epoch:  158
Training Loss:  1.0627292
Validation Loss:  1.1494328

Time Elapased:  5.8751688259999355

Epoch:  159
Training Loss:  1.0538237
Validation Loss:  1.1444428

Time Elapased:  5.9975899969995226

Epoch:  160
Training Loss:  1.0507228
Validation Loss:  1.153449
Saving Checkpoint for global_step 160

Time Elapased:  6.474693729999672

Epoch:  161
Training Loss:  1.044446
Validation Loss:  1.1534194

Time Elapased:  5.960681148000731

Epoch:  162
Training Loss:  1.0335805
Validation Loss:  1.155788

Time Elapased:  5.9549776939993535

Epoch:  163
Training Loss:  1.0338544
Valid

Epoch:  232
Training Loss:  1.0246015
Validation Loss:  1.203646

Time Elapased:  5.835512875999484

Epoch:  233
Training Loss:  1.0110682
Validation Loss:  1.2141509

Time Elapased:  5.872164028999578

Epoch:  234
Training Loss:  0.9942828
Validation Loss:  1.2052824

Time Elapased:  5.891425026000434

Epoch:  235
Training Loss:  0.98151875
Validation Loss:  1.2111766

Time Elapased:  5.867962274000092

Epoch:  236
Training Loss:  0.96865124
Validation Loss:  1.212888

Time Elapased:  5.922088010000152

Epoch:  237
Training Loss:  0.9599912
Validation Loss:  1.2158647

Time Elapased:  5.9794999309997365

Epoch:  238
Training Loss:  0.9511332
Validation Loss:  1.2154579

Time Elapased:  5.9658316340000965

Epoch:  239
Training Loss:  0.94206417
Validation Loss:  1.2174399

Time Elapased:  5.946629297999607

Epoch:  240
Training Loss:  0.93573165
Validation Loss:  1.2209558
Saving Checkpoint for global_step 240

Time Elapased:  6.23345393999989

Epoch:  241
Training Loss:  0.9292364
Val

Epoch:  310
Training Loss:  0.69915956
Validation Loss:  1.3529732
Saving Checkpoint for global_step 310

Time Elapased:  6.353069824000158

Epoch:  311
Training Loss:  0.689918
Validation Loss:  1.3514082

Time Elapased:  5.929295946999446

Epoch:  312
Training Loss:  0.6894162
Validation Loss:  1.3536363

Time Elapased:  5.913882948999344

Epoch:  313
Training Loss:  0.6928287
Validation Loss:  1.3565347

Time Elapased:  5.935443428000326

Epoch:  314
Training Loss:  0.6922823
Validation Loss:  1.3583763

Time Elapased:  5.7334762859991315

Epoch:  315
Training Loss:  0.68502915
Validation Loss:  1.3570921

Time Elapased:  5.884842737999861

Epoch:  316
Training Loss:  0.6853718
Validation Loss:  1.3620443

Time Elapased:  5.916462743999546

Epoch:  317
Training Loss:  0.6797564
Validation Loss:  1.3613493

Time Elapased:  6.138159335999262

Epoch:  318
Training Loss:  0.67884004
Validation Loss:  1.3664856

Time Elapased:  5.970263565000096

Epoch:  319
Training Loss:  0.6776858
Val

Epoch:  388
Training Loss:  0.5658481
Validation Loss:  1.4720277

Time Elapased:  6.029130785000234

Epoch:  389
Training Loss:  0.56542826
Validation Loss:  1.4734447

Time Elapased:  5.910397575000388

Epoch:  390
Training Loss:  0.5684037
Validation Loss:  1.4767586
Saving Checkpoint for global_step 390

Time Elapased:  6.298459900000125

Epoch:  391
Training Loss:  0.5633911
Validation Loss:  1.4774044

Time Elapased:  5.881966282999201

Epoch:  392
Training Loss:  0.5590972
Validation Loss:  1.4796481

Time Elapased:  6.026799240999935

Epoch:  393
Training Loss:  0.5591944
Validation Loss:  1.4811873

Time Elapased:  5.9910072510001555

Epoch:  394
Training Loss:  0.56127626
Validation Loss:  1.4825389

Time Elapased:  6.071819582999524

Epoch:  395
Training Loss:  0.5634375
Validation Loss:  1.4816172

Time Elapased:  5.953082380999149

Epoch:  396
Training Loss:  0.56078583
Validation Loss:  1.4815663

Time Elapased:  6.028373017999911

Epoch:  397
Training Loss:  0.5591532
Va

Epoch:  466
Training Loss:  0.4957801
Validation Loss:  1.5718012

Time Elapased:  5.991114006000316

Epoch:  467
Training Loss:  0.49593532
Validation Loss:  1.5719974

Time Elapased:  5.882351249000749

Epoch:  468
Training Loss:  0.4941706
Validation Loss:  1.5711542

Time Elapased:  5.7840890970001055

Epoch:  469
Training Loss:  0.49043295
Validation Loss:  1.5760565

Time Elapased:  5.925197380999634

Epoch:  470
Training Loss:  0.49177653
Validation Loss:  1.5744491
Saving Checkpoint for global_step 470

Time Elapased:  6.415263044999847

Epoch:  471
Training Loss:  0.49537516
Validation Loss:  1.5751716

Time Elapased:  6.142953067000235

Epoch:  472
Training Loss:  0.48929465
Validation Loss:  1.5772574

Time Elapased:  5.984634774999904

Epoch:  473
Training Loss:  0.49005035
Validation Loss:  1.5810537

Time Elapased:  6.0907010359997

Epoch:  474
Training Loss:  0.4901415
Validation Loss:  1.5833912

Time Elapased:  5.986985335000099

Epoch:  475
Training Loss:  0.48912024


In [10]:
def setup_graph_and_saver(learning_rate):
    tf.reset_default_graph()    
    tf.set_random_seed(1)                             
    seed = 3 
    global_step = tf.Variable(0, name='global_step', trainable=False)
    config = Config()
    spj = SPJ(config)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
    #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9).minimize(spj._loss, global_step=global_step)  
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    return spj, saver, global_step, optimizer, init, seed

def direct_inference(data, learning_rate, minibatch_size,home_dir, version):

    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    # Directory Where Saved Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 
        
        losses = []
        
        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            
            # Feed
            feed={spj._H: minibatch_H, 
                  spj._Ipast: minibatch_Ipast, 
                  spj._Ifuture: minibatch_Ifuture, 
                  spj._x: minibatch_Xcaptions, 
                  spj._y: minibatch_Ycaptions, 
                  spj._keep_prob: 1.0,
                  spj._reg: 0.0
                 }
            
            # Run Predictions
            loss, pred, lab = sess.run([spj._loss, spj._predictions, spj._y], feed_dict=feed) 
            lab = np.argmax(lab,axis=3)
            losses.append(loss)
            
            # Cache Results
            if counter == 0:
                predictions = pred
                labels = lab
                ids = minibatch_VideoIds
            else:
                predictions = np.concatenate((predictions,pred),axis=0)
                labels = np.concatenate((labels,lab),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
        avg_loss = np.mean(losses)
        print(avg_loss)

    return predictions, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
#data =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
#data =   (VideoIds_test, Framestamps_test, H_test,   Ipast_test,   Ifuture_test,   Ycaptions_test,   Xcaptions_test)
predictions2, labels2, ids2 = direct_inference(data, learning_rate, minibatch_size, home_dir, version)

Restoring from latest checkpoint...
INFO:tensorflow:Restoring parameters from /home/songzeli/checkpoints_100_train_attention_in_graph/model-500
0.4731813


In [78]:
print_pred_and_labels(predictions2, labels2, ids2, id2word, example=1, proposal=5)


VIDEO ID             PREDICTION           LABEL               
--------             -----                -----               
v_sjyZWmvTGA4        he                   b'<pad>'            
v_sjyZWmvTGA4        also                 b'<pad>'            
v_sjyZWmvTGA4        out                  b'<pad>'            
v_sjyZWmvTGA4        out                  b'<pad>'            
v_sjyZWmvTGA4        front                b'<pad>'            
v_sjyZWmvTGA4        the                  b'<pad>'            
v_sjyZWmvTGA4        shed                 b'<pad>'            
v_sjyZWmvTGA4        home                 b'<pad>'            
v_sjyZWmvTGA4        b'<end>'             b'<pad>'            
v_sjyZWmvTGA4        b'<end>'             b'<pad>'            
v_sjyZWmvTGA4        b'<end>'             b'<pad>'            
v_sjyZWmvTGA4        b'<end>'             b'<pad>'            
v_sjyZWmvTGA4        a                    b'<pad>'            
v_sjyZWmvTGA4        b'<end>'             b'<pad>'    

0

In [16]:
bleu1, bleu2, bleu3, bleu4 = compute_bleu_at_1_2_3_4(labels2, predictions2)
print(bleu1, bleu2, bleu3, bleu4)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 5-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.5314450908821954 0.5159225729973744 0.5282469538767958 0.5400661911257377


sample

In [19]:
print(home_dir)

/home/songzeli


In [47]:
tf.set_random_seed(1)                             
seed = 3            
from utils.data_utils import *

tf.reset_default_graph()
config = Config()
spj = SPJ(config)
global_step = tf.Variable(0, name='global_step', trainable=False)
checkpoint_dir = "/home/songzeli/checkpoints_100_train_attention_in_graph"
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver = tf.train.Saver()
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
# train_minibatches = random_mini_batches(H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
train_minibatches = random_mini_batches(VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
# (minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatches[0]   
(minibatch_VideoIds_train, minibatch_Framestamps_train, minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatches[0]
with tf.Session() as sess:   
    saver.restore(sess, latest_checkpoint)
    word_id = spj.caption_generation(sess,minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Xcaptions_train, minibatch_Ycaptions_train)
print ("word_id: ", word_id)

INFO:tensorflow:Restoring parameters from /home/songzeli/checkpoints_100_train_attention_in_graph/model-500


  a = np.log(a) / (temperature)


word_id:  [[[2.0000e+00 2.7440e+03 8.8500e+02 ... 1.4570e+03 3.5800e+02 5.6400e+03]
  [2.0000e+00 1.7000e+02 4.7210e+03 ... 4.3320e+03 3.2540e+03 1.0947e+04]
  [2.0000e+00 4.9280e+03 6.0040e+03 ... 4.7980e+03 1.0270e+04 4.7210e+03]
  ...
  [2.0000e+00 8.4150e+03 2.3670e+03 ... 8.7480e+03 1.1840e+03 9.7890e+03]
  [2.0000e+00 4.7210e+03 6.7670e+03 ... 4.2680e+03 3.1160e+03 1.8770e+03]
  [2.0000e+00 9.9000e+02 3.2540e+03 ... 3.1960e+03 3.3300e+03 1.0140e+03]]

 [[2.0000e+00 1.4570e+03 5.0130e+03 ... 4.8790e+03 6.3080e+03 7.8510e+03]
  [2.0000e+00 9.2180e+03 8.2450e+03 ... 6.3010e+03 9.1590e+03 6.4860e+03]
  [2.0000e+00 1.7000e+02 9.9890e+03 ... 3.0000e+00 9.7150e+03 6.6210e+03]
  ...
  [2.0000e+00 9.7610e+03 5.5340e+03 ... 8.9230e+03 5.6400e+03 1.6200e+02]
  [2.0000e+00 5.8070e+03 1.5810e+03 ... 3.1960e+03 9.7610e+03 1.0875e+04]
  [2.0000e+00 2.7440e+03 9.9240e+03 ... 3.6890e+03 9.1160e+03 3.1160e+03]]

 [[2.0000e+00 9.6130e+03 5.1000e+03 ... 7.7260e+03 6.2390e+03 2.8580e+03]
  [2.0000e+0

In [77]:
word_id.shape
bat = 1
length = 5
for k in range(word_id.shape[2]):
    print(id2word[word_id[bat,length,k]])

b'<sta>'
then
boys
him
smiling
using
welding
two
welding
through
rides
a
ocean
kitchen
event
event
her
jason
have
moving
practiced
and
ping
ground
the
she
b'<end>'
while
by


In [None]:
def direct_inference(data, learning_rate, minibatch_size,home_dir, version):

    # Extract Test Data
    (VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
    num_data = H.shape[0]
    
    # Setup Graph
    spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
    # Directory Where Saved Checkpoint
    checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"
    
    # Start Session
    with tf.Session() as sess:

        # Check for Latest Checkpoint
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        print("Restoring from latest checkpoint...")
        saver.restore(sess, latest_checkpoint)
        
        # Get minibatches
        num_minibatches = num_data // minibatch_size  
        seed = seed + 1
        minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 
        
        losses = []
        
        # For all batchs
        for counter, minibatch in enumerate(minibatches):
            
            # Select minibatch
            (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
            minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
            
            # Feed
            feed={spj._H: minibatch_H, 
                  spj._Ipast: minibatch_Ipast, 
                  spj._Ifuture: minibatch_Ifuture, 
                  spj._x: minibatch_Xcaptions, 
                  spj._y: minibatch_Ycaptions, 
                  spj._keep_prob: 1.0,
                  spj._reg: 0.0
                 }
            
            # Run Predictions
            loss, pred, lab = sess.run([spj._loss, spj._predictions, spj._y], feed_dict=feed) 
            lab = np.argmax(lab,axis=3)
            losses.append(loss)
            
            # Cache Results
            if counter == 0:
                predictions = pred
                labels = lab
                ids = minibatch_VideoIds
            else:
                predictions = np.concatenate((predictions,pred),axis=0)
                labels = np.concatenate((labels,lab),axis=0)
                ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
        avg_loss = np.mean(losses)
        print(avg_loss)

    return predictions, labels, ids
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
#data =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
#data =   (VideoIds_test, Framestamps_test, H_test,   Ipast_test,   Ifuture_test,   Ycaptions_test,   Xcaptions_test)
predictions2, labels2, ids2 = direct_inference(data, learning_rate, minibatch_size, home_dir, version)

In [None]:
(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions) = data
num_data = H.shape[0]

# Setup Graph
spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)
# Directory Where Saved Checkpoint
checkpoint_dir = home_dir + "/checkpoints_" + str(version) + "/"

# Start Session
with tf.Session() as sess:

    # Check for Latest Checkpoint
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    print("Restoring from latest checkpoint...")
    saver.restore(sess, latest_checkpoint)

    # Get minibatches
    num_minibatches = num_data // minibatch_size  
    seed = seed + 1
    minibatches = random_mini_batches(VideoIds, Framestamps, H, Ipast, Ifuture, Ycaptions, Xcaptions, minibatch_size, seed) 

    losses = []

    # For all batchs
    for counter, minibatch in enumerate(minibatches):

        # Select minibatch
        (minibatch_VideoIds, minibatch_Framestamps, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = minibatch
        minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)

        # Feed
        feed={spj._H: minibatch_H, 
              spj._Ipast: minibatch_Ipast, 
              spj._Ifuture: minibatch_Ifuture, 
              spj._x: minibatch_Xcaptions, 
              spj._y: minibatch_Ycaptions, 
              spj._keep_prob: 1.0,
              spj._reg: 0.0
             }

        # Run Predictions
        loss, pred, lab,lstm_outputs,Hout = sess.run([spj._loss, spj._predictions, spj._y, spj._lstm_outputs,spj._Hout], feed_dict=feed) 
        lab = np.argmax(lab,axis=3)
        losses.append(loss)
        
#         print (lstm_outputs.shape)

        # Cache Results
        if counter == 0:
            predictions = pred
            labels = lab
            ids = minibatch_VideoIds
            lstm_out = lstm_outputs
            mini_H = minibatch_H
            mini_Ipast = minibatch_Ipast
            mini_Ifuture = minibatch_Ifuture
            Hout_output = Hout
        else:
            predictions = np.concatenate((predictions,pred),axis=0)
            labels = np.concatenate((labels,lab),axis=0)
            ids = np.concatenate((ids, minibatch_VideoIds),axis=0)
            lstm_out = np.concatenate((lstm_out,lstm_outputs),axis=0)
            mini_H = np.concatenate((mini_H,minibatch_H),axis=0)
            mini_Ipast = np.concatenate((mini_Ipast,minibatch_Ipast),axis=0)
            mini_Ifuture = np.concatenate((mini_Ifuture,minibatch_Ifuture),axis=0)
            Hout_output = np.concatenate((Hout_output,Hout),axis=0)
    avg_loss = np.mean(losses)

In [None]:
data = (VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
#data =   (VideoIds_val, Framestamps_val, H_val,   Ipast_val,   Ifuture_val,   Ycaptions_val,   Xcaptions_val)
#data =   (VideoIds_test, Framestamps_test, H_test,   Ipast_test,   Ifuture_test,   Ycaptions_test,   Xcaptions_test)
predictions2, labels2, ids2 = direct_inference(data, learning_rate, minibatch_size, home_dir, version)

In [None]:
print_pred_and_labels(predictions2, labels2, ids2, id2word, example=78, proposal=3)

In [None]:
import numpy as np
np.random.seed(0)
import seaborn as sns
sns.set()
att = np.abs(lstm_out[60,3,:-128].reshape(3,-1))
# ax = sns.heatmap(att)
ax = sns.heatmap(np.sum(att,axis = 1).reshape(1,-1))
print('past',np.sum(att,axis = 1))

In [None]:
with tf.Session() as sess:
        # Check for Latest Checkpoint
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    print("Restoring from latest checkpoint...")
    saver.restore(sess, latest_checkpoint)
    variables_names =[v.name for v in tf.trainable_variables()]
    print(variables_names)
    w_lstm = 'rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0'
    values = sess.run(w_lstm)
    print(values.shape)
#     for k,v in zip(variables_names, values):
#         print(k)
#          print('rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0', v)

In [None]:
ax = sns.heatmap(att_Past[10,0:10].reshape(1,-1),xticklabels=2, yticklabels=False,cmap="YlGnBu")

In [None]:
ax = sns.heatmap(att_H[100,0:10].reshape(1,-1),xticklabels=2, yticklabels=False,cmap="YlGnBu")

In [None]:
ax = sns.heatmap(att_Future[100,0:10].reshape(1,-1),xticklabels=2, yticklabels=False,cmap="YlGnBu")

Attention Level of Each Word

In [None]:
import pylab as pl
example = 78
proposal = 3
ind = example*10+proposal
attention_Past = np.transpose(lstm_out,[0,2,1])[:,:500,:]*np.expand_dims(Hout_output.reshape(-1,1500)[:,0:500],2)
attention_H = np.transpose(lstm_out,[0,2,1])[:,:500,:]*np.expand_dims(Hout_output.reshape(-1,1500)[:,500:1000],2)
attention_Future = np.transpose(lstm_out,[0,2,1])[:,:500,:]*np.expand_dims(Hout_output.reshape(-1,1500)[:,1000:1500],2)
att_Past = np.sum(np.abs(attention_Past),axis=1)
att_H = np.sum(np.abs(attention_H),axis=1)
att_Future = np.sum(np.abs(attention_Future),axis=1)
import matplotlib.pyplot as plt
x = np.array([i for i in range(10)])
my_xticks =[str(id2word[labels[example,proposal,i]]) for i in range(10)]
plt.xticks(x, my_xticks)
plt.plot((att_Past[ind,0:10]-min(att_Past[ind,0:10]))/max(att_Past[ind,0:10]),label='Attention_To_Past',marker='o',markersize=10)
plt.plot((att_H[ind,0:10]-min(att_H[ind,0:10]))/max(att_H[ind,0:10]),label='Attention_To_Current',marker='s',markersize=10)
plt.plot((att_Future[ind,0:10]-min(att_Future[ind,0:10]))/max(att_Future[ind,0:10]),label='Attention_To_Future',marker='*',markersize=10)
plt.ylabel('Attention_Level')
plt.legend()
pl.xticks(rotation=-60,size=15)
plt.show()

Beam_Search

In [140]:
def softmax(x,ax = 0):
    return np.exp(x) / np.exp(x).sum(axis = ax,keepdims =True)

tf.reset_default_graph()    
tf.set_random_seed(1)                             
seed = 3 
global_step = tf.Variable(0, name='global_step', trainable=False)
config = Config()
spj = SPJ(config)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(spj._loss, global_step=global_step)
init = tf.global_variables_initializer()
saver = tf.train.Saver()

batch_size = 25
beam_size = 1
data = (H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train)
word_pred = np.ones([batch_size,config.num_proposals,beam_size*beam_size])*2 #下一个九个单词id
prob_cur = np.ones([batch_size,config.num_proposals,beam_size*beam_size]) #当前九个句子的概率 word_pred = word_pred*softmax(word_pred)
prob_prev_caption = np.ones([batch_size,config.num_proposals,beam_size]) #从九个句子里选三个的概率
temp_caption = np.zeros([batch_size,config.num_proposals,config.num_steps,beam_size*beam_size]) #beam_size*beam_size句子 
beam_caption = np.zeros([batch_size,config.num_proposals,config.num_steps,beam_size]) #50words的beam_size句子
beam_caption[:,:,0,:] = 2 
temp_caption[:,:,0,:] = 2
num = 1


# logits_beam = np.zeros([batch_size,config.num_proposals,config.num_steps,config.num_classes,beam_size])
#三个句子调用sess，softmax三个word---》下一个九个单词--—》算九个概率——》概率乘起来选三个句子-》三个句子付给beam_caption

train_minibatches = random_mini_batches(VideoIds_train, Framestamps_train, H_train, Ipast_train, Ifuture_train, Ycaptions_train, Xcaptions_train, minibatch_size, seed)
#(minibatch_H_train, minibatch_Ipast_train, minibatch_Ifuture_train, minibatch_Ycaptions_train, minibatch_Xcaptions_train) = train_minibatches[0]   
(minibatch_VideoIds_train, minibatch_Framestamps_train, minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = train_minibatches[0]
minibatch_Ycaptions = id_2_one_hot_void_padding(minibatch_Ycaptions, spj.config.num_classes, void_dim=0)
spj, saver, global_step, optimizer, init, seed = setup_graph_and_saver(learning_rate)



print(minibatch_Xcaptions.shape)
with tf.Session() as sess:
# (minibatch_H, minibatch_Ipast, minibatch_Ifuture, minibatch_Ycaptions, minibatch_Xcaptions) = train_minibatches[1]
    sess.run(init)
    while num < config.num_steps-1: 
        print(num)
        for i in range(beam_size):
            feed = {spj._H: minibatch_H,
                spj._Ipast: minibatch_Ipast,
                spj._Ifuture: minibatch_Ifuture,
                spj._x: beam_caption[:,:,:,i],
                spj._y: minibatch_Ycaptions,
                spj._keep_prob: 1.0,
                spj._reg: 0.0
               }
            
            logits = sess.run(spj.logits,feed_dict=feed)
#             print('logits',logits.shape)
            logits_softmax = softmax(logits,3)
            beam_softmax = np.sort(logits_softmax)[:,:,:,::-1][:,:,:,:beam_size] # softmax 求概率、
            prob_cur[:,:,i*beam_size:(i+1)*beam_size] = beam_softmax[:,:,num,:]
            beam_word_id = np.argsort(logits_softmax[:,:,:,:],axis =3)[:,:,:,::-1][:,:,:,:beam_size] #对概率排序去beam_size index
            word_pred[:,:,i*beam_size:(i+1)*beam_size] = beam_word_id[:,:,num,:]
            temp_caption[:,:,num,i*beam_size:(i+1)*beam_size] = word_pred[:,:,i*beam_size:(i+1)*beam_size] #讲pred_word赋值给temp_caption. 九个
            prob_cur[:,:,i*beam_size:(i+1)*beam_size] = prob_cur[:,:,i*beam_size:(i+1)*beam_size] * np.expand_dims(prob_prev_caption[:,:,i],axis=2)
#             print(id2word[int(temp_caption[1,1,num,0])])
        caption_id = np.argsort(prob_cur,axis=2)[:,:,::-1][:,:,:beam_size]
#       
        for i in range(temp_caption.shape[0]):
            for j in range(temp_caption.shape[1]):
                for k in range(caption_id.shape[2]):
#                     print(id2word[int(temp_caption[i,j,num,caption_id[i,j,k]])])
                    beam_caption[i,j,0:num+1,k] = temp_caption[i,j,0:num+1,caption_id[i,j,k]]
#                     assert beam_caption[i,j,num,k]==temp_caption[i,j,num,caption_id[i,j,k]]
#                     print(id2word[int(beam_caption[i,j,num,k])])
        num += 1

(25, 10, 30)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
