In [2]:
# As usual, a bit of setup
import time
import numpy as np
import matplotlib.pyplot as plt
import LOUPE.WILLOW.loupe as lp
import tensorflow as tf
import h5py
import pandas as pd
import csv
from utils.data_utils import temporal_indicator
from utils.data_utils import temporal_pooling

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


# Training Data

Load small training data set

In [3]:
# format
train_data = pd.read_csv("train_extreme_small.csv")
train_data.rename( columns={'Unnamed: 0':'index'}, inplace=True )
train_data["duration"] = train_data["duration"].astype('float64')
train_data["t_init"], train_data["t_end"] = train_data["timestamps"].str.split(", ", 1).str
train_data["t_init"] = train_data["t_init"].str.strip("[")
train_data["t_end"] = train_data["t_end"].str.strip("]")
train_data["t_init"] = train_data["t_init"].astype('float64')
train_data["t_end"] = train_data["t_end"].astype('float64')
train_data = train_data.drop('timestamps', 1)

# pool
filename = "c3d_data/sub_activitynet_v1-3.c3d.hdf5"
video_feature_representation = h5py.File(filename, 'r')
train_ids = train_data['id'].unique()
print(train_ids.shape)
f_inits = []
f_ends = []
max_pooled_representations = []
max_proposals = 0
all_padded_proposals = np.zeros((99,500,30))
for v,video_id in enumerate(train_ids):
    #print("video id: ", video_id)
    temp = train_data[train_data['id']==video_id].reset_index()
    C3D_features = video_feature_representation["v_QOlSCBRmfWY"]['c3d_features'].value
    
    if max_proposals < temp.shape[0]:
        max_proposals = temp.shape[0]
    
    for i in range(temp.shape[0]):
        
        # get time info
        duration = temp["duration"][i]
        t_init = temp["t_init"][i]
        t_end = temp["t_end"][i]
        num_frames = C3D_features.shape[0]
        
        # compute start and end frame
        f_init = int(round((t_init/duration)*num_frames))
        f_end = int(round((t_end/duration)*num_frames))
        #print("f_init: ", f_init, "t_init: ", t_init)
        #print("f_end: ", f_end, "t_end: ", t_end)
        
        # get max pool
        if f_init <= f_end:
            max_pooled_rep = temporal_pooling(C3D_features[f_init:f_end],"max")
        else:
            max_pooled_rep = temporal_pooling(C3D_features[f_end:f_init],"max")
        
        # append info
        f_inits.append(f_init)
        f_ends.append(f_end)
        max_pooled_representations.append(max_pooled_rep)
        all_padded_proposals[v,:,i] = max_pooled_rep


f_inits = np.array(f_inits)
f_inits = pd.DataFrame({'f_init': f_inits})
f_ends = np.array(f_ends) 
f_ends = pd.DataFrame({'f_end': f_ends})

max_pooled_representations = np.array(max_pooled_representations)
C3D_feature_column_names = ["h" + str(i) for i in range(max_pooled_representations.shape[1])] 
max_pooled_representations = pd.DataFrame(max_pooled_representations, columns=C3D_feature_column_names)

train_data = pd.concat([train_data, f_inits, f_ends, max_pooled_representations], axis=1)
print(train_data.shape)

train_data.to_pickle("train_data")

(99,)
(590, 508)


In [4]:
train_data

Unnamed: 0,index,duration,id,sentences,t_init,t_end,f_init,f_end,h0,h1,...,h490,h491,h492,h493,h494,h495,h496,h497,h498,h499
0,0,139.04,v_sJFgo9H6zNo,A little boy playing on a jungle gym.,3.48,17.38,13,64,-3.298260,18.175632,...,-0.155309,0.368850,0.807826,0.098474,0.249643,0.411592,1.026120,0.443485,0.906923,0.536299
1,1,139.04,v_sJFgo9H6zNo,He is climbing up a set of stairs.,3.48,13.90,13,51,-4.229768,18.175632,...,-0.155309,0.368850,0.807826,0.098474,0.249643,-0.084871,0.693306,0.443485,0.906923,0.534756
2,2,139.04,v_sJFgo9H6zNo,Now is sliding fast down a slide.,9.04,18.77,33,70,-3.298260,17.193672,...,-0.192642,0.205117,0.807826,-0.323197,0.222674,0.411592,1.026120,0.443485,0.506699,0.536299
3,3,139.04,v_sJFgo9H6zNo,Another boy is shown playing in the grass.,19.47,27.11,72,100,-4.215715,15.575966,...,0.870935,0.286012,0.961404,0.283244,0.443157,-0.432416,0.625697,0.726257,1.109358,0.451291
4,4,139.04,v_sJFgo9H6zNo,Now the first boy is being picked up by a woman.,27.11,38.24,100,142,1.038864,14.453793,...,1.222381,0.036679,1.206612,0.791813,0.821577,-0.229007,0.612485,0.756787,0.788987,0.448158
5,5,139.04,v_sJFgo9H6zNo,Now he is going to go down the slide once again.,33.37,45.19,124,167,1.038864,13.792494,...,1.222381,0.120746,1.173679,0.791813,0.834100,-0.105713,0.588857,0.814192,1.046344,0.818819
6,6,139.04,v_sJFgo9H6zNo,The boy likes playing with a large wheel on t...,45.88,51.45,170,191,-2.737857,15.241880,...,0.383209,-0.058032,0.908289,0.167768,0.472774,-0.091540,0.464823,1.251451,0.669822,0.112905
7,7,139.04,v_sJFgo9H6zNo,"Once again he goes down the slide, but with d...",65.35,78.56,242,291,-4.116940,15.370780,...,1.458786,0.466416,0.811957,0.718903,0.579573,-0.240675,0.811235,0.308810,1.025162,1.113936
8,8,139.04,v_sJFgo9H6zNo,He goes down the slide a 4th time but with an...,84.82,98.02,314,363,-3.992257,16.570752,...,1.129785,0.158361,1.482520,1.054543,0.572467,-0.555873,0.729230,0.590229,1.265893,0.719463
9,9,139.04,v_sJFgo9H6zNo,He goes down the slide several more times.,111.93,130.70,415,484,-3.744046,12.659850,...,1.314608,1.073732,1.609605,0.642320,0.610660,0.249365,0.554462,0.630031,1.424568,0.814642


In [5]:
all_padded_proposals[0,0:10,29]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

# Get word2id in sentences

In [7]:
import sys
import numpy as np


glove_path = '/home/martnzjulio_a/SPJ/glove.6B.100d.txt'
glove_dim = 100
_PAD = b"<pad>"
_UNK = b"<unk>"
_START_VOCAB = [_PAD, _UNK]
PAD_ID = 0
UNK_ID = 1


def get_glove(glove_path, glove_dim):
    """Reads from original GloVe .txt file and returns embedding matrix and
    mappings from words to word ids.

    Input:
      glove_path: path to glove.6B.{glove_dim}d.txt
      glove_dim: integer; needs to match the dimension in glove_path

    Returns:
      emb_matrix: Numpy array shape (400002, glove_dim) containing glove embeddings
        (plus PAD and UNK embeddings in first two rows).
        The rows of emb_matrix correspond to the word ids given in word2id and id2word
      word2id: dictionary mapping word (string) to word id (int)
      id2word: dictionary mapping word id (int) to word (string)
    """

    print ("Loading GLoVE vectors from file: %s" % glove_path)
    vocab_size = int(4e5) # this is the vocab size of the corpus we've downloaded

    emb_matrix = np.zeros((vocab_size + len(_START_VOCAB), glove_dim))
    word2id = {}
    id2word = {}

    random_init = True
    # randomly initialize the special tokens
    if random_init:
        emb_matrix[:len(_START_VOCAB), :] = np.random.randn(len(_START_VOCAB), glove_dim)

    # put start tokens in the dictionaries
    idx = 0
    for word in _START_VOCAB:
        word2id[word] = idx
        id2word[idx] = word
        idx += 1

    # go through glove vecs
    with open(glove_path, 'r') as fh:
        for line in fh:
            line = line.lstrip().rstrip().split(" ")
            word = line[0]
            vector = list(map(float, line[1:]))
            if glove_dim != len(vector):
                raise Exception("You set --glove_path=%s but --embedding_size=%i. If you set --glove_path yourself then make sure that --embedding_size matches!" % (glove_path, glove_dim))
            emb_matrix[idx, :] = vector
            word2id[word] = idx
            id2word[idx] = word
            idx += 1

    final_vocab_size = vocab_size + len(_START_VOCAB)
    assert len(word2id) == final_vocab_size
    assert len(id2word) == final_vocab_size
    assert idx == final_vocab_size

    return emb_matrix, word2id, id2word

emb,word2id,id2word = get_glove(glove_path, glove_dim)

Loading GLoVE vectors from file: /home/martnzjulio_a/SPJ/glove.6B.100d.txt


In [44]:

def split_by_whitespace(sentence):
    words = []
    for space_separated_fragment in sentence.strip().split():
        words.extend(re.split(" ", space_separated_fragment))
    return [w for w in words if w]


def intstr_to_intlist(string):
    """Given a string e.g. '311 9 1334 635 6192 56 639', returns as a list of integers"""
    return [int(s) for s in string.split()]


def sentence_to_token_ids(sentence, word2id):
    """Turns an already-tokenized sentence string into word indices
    e.g. "i do n't know" -> [9, 32, 16, 96]
    Note any token that isn't in the word2id mapping gets mapped to the id for UNK
    """
    tokens = split_by_whitespace(sentence) # list of strings
    ids = [word2id.get(w, UNK_ID) for w in tokens]
    return tokens, ids


def padded(token_batch, batch_pad=0):
    """
    Inputs:
      token_batch: List (length batch size) of lists of ints.
      batch_pad: Int. Length to pad to. If 0, pad to maximum length sequence in token_batch.
    Returns:
      List (length batch_size) of padded of lists of ints.
        All are same length - batch_pad if batch_pad!=0, otherwise the maximum length in token_batch
    """
    maxlen = max(lambda x: len(x), token_batch) if batch_pad == 0 else batch_pad
    res = token_batch + [PAD_ID] * (maxlen - len(token_batch))

    return res


import re
#padding for sentences
steps_len = 50
all_padded_sentences = np.zeros((99,steps_len,30))

for v,video_id in enumerate(train_ids):
    temp = train_data[train_data['id']==video_id].reset_index()
    for i in range(temp.shape[0]):
        words,ids = sentence_to_token_ids(temp['sentences'][i][:-1],word2id)
        ids_pad = padded(ids,steps_len)
        for iid,id in enumerate(ids_pad):
            all_padded_sentences[v,iid,i] = id

all_padded_sentences_id = np.array(all_padded_sentences[1,:,0:10]).astype(int)
        
all_padded_sentences_id

array([[    1,     1,     1,     1,     1,     1,     1,     1,     1,
            1],
       [  302,   302,   335,  2928,   335,   335,   335,   302,   335,
          335],
       [   16,    16,  1751,  7434,  1751,  1751,  1751,  3665,  1751,
         1751],
       [  877,   286,  6493,     2,  6493,  2385,  2278,     2,  1434,
          228],
       [   28,     2,     7,  1502,   206,     6,     6,   335,     6,
           24],
       [  139,   335,   228,     7,    73,  5717,   284,  1751,     2,
            2],
       [ 1890,  1751,   983,     2,  5691,    62,    19,   244,  4925,
         3536],
       [    6,     0,     2,   335,     0,     2,     2,   137,     7,
            0],
       [  888,     0,  3536,  1751,     0,  5691,  4879,     2,     2,
            0],
       [    9,     0,     0, 11072,     0,     7,     7,  5691,   302,
            0],
       [  335,     0,     0,    19,     0,     2,  1668,     0,  3665,
            0],
       [ 1751,     0,     0,  9813,     0, 

# Training Model

In [None]:
# Test Case

H_temp1 = np.transpose(np.array(train_data.iloc[0:5,8:]))   # 5 x 500
H_temp2 = np.transpose(np.array(train_data.iloc[5:10,8:]))   # 5 x 500
H_temp3 = np.transpose(np.array(train_data.iloc[10:15,8:]))  # 5 x 500

H_temp = np.stack((H_temp1,H_temp2,H_temp3),axis=0)

H_temp.shape

Ipast_temp = (np.random.randn(3,5,5) < 0.5).astype(float)
Ifuture_temp = (np.random.randn(3,5,5) < 0.5).astype(float)

print(Ipast_temp.shape)

In [None]:
def initialize_parameters(N):
    """
    Initializes parameters to build a neural network with tensorflow. The shapes are:
                        Wa : [N, N]
                        ba : [N, 1]
    
    Returns:
    parameters -- a dictionary of tensors containing Wa, ba
    """
    Wa = tf.get_variable("Wa", [N,N], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    ba = tf.get_variable("ba", [N,1], initializer = tf.zeros_initializer())
    
    parameters = {"Wa": Wa,
                  "ba": ba}
    return parameters

In [None]:
def create_placeholders(N,K):
    """
    Creates the placeholders for the tensorflow session.
    
    Arguments:
    N -- scalar, size of a C3D vector (i.e. 500)
    K -- scalar, number of proposals
    
    Returns:
    H -- placeholder for the C3D features, shape = [None, N, k] and dtype "float"
    Ipast -- placeholder for the indicators of past, shape = [None, K, k] and dtype "float"
    Ifuture == placeholder for the indicators of future, shape = [None, K, k] and dtype "float"
    
    Tips:
    - You will use None because it let's us be flexible on the number of examples you will for the placeholders.
      In fact, the number of examples during test/train is different.
    """
    
    #placeholders
    H = tf.placeholder(tf.float32,shape=[None, N, K], name="H")
    Ipast = tf.placeholder(tf.float32, shape=[None, K, K], name="Ipast")
    Ifuture = tf.placeholder(tf.float32, shape=[None, K, K], name="Ifuture")
    return H, Ipast, Ifuture

In [None]:
def attention_module(H,Ipast,Ifuture,parameters):
    """
    Implements the attention module: see https://cs.stanford.edu/people/ranjaykrishna/densevid/
    
    Arguments:
    H -- input dataset placeholder, of shape = [None, N, K] and dtype "float"
    Ipast -- placeholder for the indicators of past, shape = [None, K, k] and dtype "float"
    Ifuture == placeholder for the indicators of future, shape = [None, K, K] and dtype "float"
    parameters -- python dictionary containing your parameters "Wa", "ba", sapes [N,N] and [N,1] respectively

    Returns:
    Hout -- concatenated output (hpast, h, hfuture), shape = [None, 500, K]
    """
    
    # Retrieve the parameters from the dictionary "parameters" 
    Wa = parameters['Wa']
    ba = parameters['ba']

    #forward pass
    W = tf.transpose(tf.tensordot(Wa,tf.transpose(H,perm=[1,2,0]),axes=[[1], [0]]),perm=[2,0,1]) + ba # shape: [None,K,K]
    A = tf.matmul(tf.transpose(W,perm=[0,2,1]),H) # shape: [None,K,K]
    A_flat = tf.reshape(A, [-1, K*K]) # shape: [None,K*K]

    #future features
    Ifuture_flat = tf.reshape(Ifuture, [-1, K*K]) # shape: [None,K*K]
    Afuture = tf.reshape(tf.multiply(Ifuture_flat,A_flat),[-1,K,K]) # shape: [None,K,K]
    Zfuture = tf.reduce_sum(Ifuture,axis=2) # shape: [None,K]
    Hfuture = tf.transpose(tf.transpose(tf.matmul(H,tf.transpose(Afuture,perm=[0,2,1])),perm=[1,0,2])/Zfuture,perm=[1,0,2]) # shape: [None,N,K]

    #past features
    Ipast_flat = tf.reshape(Ipast, [-1, K*K]) # shape: [None,K*K]
    Apast = tf.reshape(tf.multiply(Ipast_flat,A_flat),[-1,K,K]) # shape: [None,K,K]
    Zpast = tf.reduce_sum(Ipast,axis=2) # shape: [None,K]
    Hpast = tf.transpose(tf.transpose(tf.matmul(H,tf.transpose(Apast,perm=[0,2,1])),perm=[1,0,2])/Zfuture,perm=[1,0,2]) # shape: [None,N,K]

    #stacked features
    Hout = tf.concat([Hpast, H, Hfuture], 1)

    print("Hfuture shape: ", Hfuture.get_shape().as_list())
    print("W shape: ", W.get_shape().as_list())
    print("A shape: ", A.get_shape().as_list())
    print("A_flat shape: ", A_flat.get_shape().as_list())
    print("Ifuture_flat shape: ", Ifuture_flat.get_shape().as_list())
    print("Zfuture: ", Zfuture.get_shape().as_list())
    print("Hfuture: ", Hfuture.get_shape().as_list())
    print("Hpast: ", Hfuture.get_shape().as_list())
    print("Hout: ", Hout.get_shape().as_list())
    
    return Hout




In [None]:
tf.reset_default_graph()

K = 5
N = 500

with tf.Session() as sess:
    H, Ipast, Ifuture = create_placeholders(N, K)
    parameters = initialize_parameters(N)
    Hout = attention_module(H,Ipast,Ifuture,parameters)
    
    print("Hout: ", Hout)
    
    init = tf.global_variables_initializer()
    sess.run(init)
    sess.run([Hout], feed_dict={H: H_temp, Ipast: Ipast_temp, Ifuture: Ifuture_temp})

In [None]:
def add_embedding_layer(sentence_ids,emb_matrix):
    """
    Adds word embedding layer to the graph.

    Inputs:
      emb_matrix: shape (400002, embedding_size).
        The GloVe vectors, plus vectors for PAD and UNK.
    """
    with vs.variable_scope("embeddings"):

        # Note: the embedding matrix is a tf.constant which means it's not a trainable parameter
        embedding_matrix = tf.constant(emb_matrix, dtype=tf.float32, name="emb_matrix") # shape (400002, embedding_size)

        # Get the word embeddings for the caption
        # using the placeholders caption
        cap_embs = embedding_ops.embedding_lookup(embedding_matrix, sentence_ids) # shape (batch_size, context_len, embedding_size)
       
    return cap_embs

In [None]:
def language_model(Hout,Sout,embeddings,word2id):
    state_size = 512
    batch_size = Hout.shape[0]
    num_steps = Sout[1]
    num_classes = embeddings[1]
    num_layers = 2
    initializer = Hout
    
    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, word2id)
    
    cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = get_initial_cell_state(cell, initializer, batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
    
    return rnn_outputs,x,y




def loss(Hout,Sout,embeddings,word2id,rnn_outputs,x,y):
    state_size = 512
    batch_size = Hout.shape[0]
    num_steps = Sout[1]
    num_classes = embeddings[1]
    num_layers = 2
    initializer = Hout
    
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
    #reshape rnn_outputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])
    logits = tf.matmul(rnn_outputs, W) + b
    losses = tf.reshape(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped),[batch_size, num_steps])
    loss_by_timestep = tf.reduce_mean(losses, reduction_indices=0)


In [None]:
def captioning_module(Hin, lstm_size, batch_size):
    # Hin shape = [batch_size, 1500, K] # NOTE: K != num_steps necessarily
    
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    for i in range(num_proposals):
    # Initial state of the LSTM memory.
    state = H[:, :, i]

        for j in range(num_steps):
            # The value of state is updated after processing each batch of words.
            output, state = lstm(output, state)

            # The rest of the code.
            # ...

    final_state = state

In [None]:
def prop_cost():

In [None]:
def total_cost():
    

In [None]:
def add_loss(self):
       """
       Add loss computation to the graph.

       Uses:
         self.logits_start: shape (batch_size, context_len)
           IMPORTANT: Assumes that self.logits_start is masked (i.e. has -large in masked locations).
           That's because the tf.nn.sparse_softmax_cross_entropy_with_logits
           function applies softmax and then computes cross-entropy loss.
           So you need to apply masking to the logits (by subtracting large
           number in the padding location) BEFORE you pass to the
           sparse_softmax_cross_entropy_with_logits function.

         self.ans_span: shape (batch_size, 2)
           Contains the gold start and end locations

       Defines:
         self.loss_start, self.loss_end, self.loss: all scalar tensors
       """
       with vs.variable_scope("loss"):

           # Calculate loss for prediction of start position
           loss_start = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits_start, labels=self.ans_span[:, 0]) # loss_start has shape (batch_size)
           self.loss_start = tf.reduce_mean(loss_start) # scalar. avg across batch
           tf.summary.scalar('loss_start', self.loss_start) # log to tensorboard

           # Calculate loss for prediction of end position
           loss_end = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits_end, labels=self.ans_span[:, 1])
           self.loss_end = tf.reduce_mean(loss_end)
           tf.summary.scalar('loss_end', self.loss_end)

           # Add the two losses
           self.loss = self.loss_start + self.loss_end
           tf.summary.scalar('loss', self.loss)

In [None]:
def compute_cost(Z3, Y):
    """
    Computes the cost
    
    Arguments:
    Z3 -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples)
    Y -- "true" labels vector placeholder, same shape as Z3
    
    Returns:
    cost - Tensor of the cost function
    """
    
    # to fit the tensorflow requirement for tf.nn.softmax_cross_entropy_with_logits(...,...)
    logits = tf.transpose(Z3)
    labels = tf.transpose(Y)
    
    ### START CODE HERE ### (1 line of code)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
    ### END CODE HERE ###
    
    return cost

In [None]:
tf.reset_default_graph()

with tf.Session() as sess:
    X, Y = create_placeholders(12288, 6)
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    cost = compute_cost(Z3, Y)
    print("cost = " + str(cost))

In [None]:
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
          num_epochs = 1500, minibatch_size = 32, print_cost = True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    (n_x, m) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    # Create Placeholders of shape (n_x, n_y)
    ### START CODE HERE ### (1 line)
    X, Y = create_placeholders(n_x, n_y)
    ### END CODE HERE ###

    # Initialize parameters
    ### START CODE HERE ### (1 line)
    parameters = initialize_parameters()
    ### END CODE HERE ###
    
    # Forward propagation: Build the forward propagation in the tensorflow graph
    ### START CODE HERE ### (1 line)
    Z3 = forward_propagation(X, parameters)
    ### END CODE HERE ###
    
    # Cost function: Add cost function to tensorflow graph
    ### START CODE HERE ### (1 line)
    cost = compute_cost(Z3, Y)
    ### END CODE HERE ###
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    ### START CODE HERE ### (1 line)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    ### END CODE HERE ###
    
    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch
                
                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                ### START CODE HERE ### (1 line)
                _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
                ### END CODE HERE ###
                
                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
        
        return parameters

In [None]:
parameters = model(X_train, Y_train, X_test, Y_test)

# Test Data (incomplete)

In [None]:
#train_ids = [line.strip() for line in open("id_data/train_ids.csv", 'r')]
filename = "c3d_data/sub_activitynet_v1-3.c3d.hdf5"
video_feature_representation = h5py.File(filename, 'r')
train_ids = ["v_--0edUL8zmA","v_hHiPEAiYKv0","v_u2uoYvo8J5s","v_c_NlYvL96y0","v_sJFgo9H6zNo"]
video_data = dict()
for videoid in train_ids:
    print(videoid)
    proposals_df = pd.read_csv('prop_data/' + videoid + '.csv',sep=' ')
    c3d_features = video_feature_representation[videoid]['c3d_features'].value
    max_frames = c3d_features.shape[0]
    print(max_frames)
    
    for i in range(proposals_df.shape[0]):
        f_init = proposals_df["f-init"][i]
        f_end =  proposals_df["f-end"][i]
        if (f_init < max_frames) and (f_end > 0):
            if f_init < 0:
                f_init = 0
            if f_end > max_frames:
                f_end = max_frames
        
            #print((f_init,f_end))
            #print(c3d_features[f_init:f_end,:].shape)
            h = temporal_pooling(c3d_features[f_init:f_end,:], mode="max")
            if i == 0:
                H = h
            else:
                H = np.column_stack((H,h))
        else:
            proposals_df.drop(proposals_df.index[i])
    video_data[videoid] = H
    