Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.

## train.ipynb
- Training and evaluating for the Chimp&See dataset (DrivenData Pri-matrix Factoriation competition, 2017)
- All layers until the convolutional layers are initialized from the RGB-I3D model (Carreira and Zimmerman, 2017) that was trained on the Kinetics dataset and pretrained on Imagenet
- Code was designed in a way that extension to multi-gpu training should be possible

In [1]:
from __future__ import absolute_import
from __future__ import division

#source: https://github.com/deepmind/kinetics-i3d
from kinetics_i3d import i3d
#modified from https://gist.github.com/drivendata/70638e8a9e6a10fa020623f143259df3
import primatrix_dataset_utils

import numpy as np
import tensorflow as tf
import sonnet as snt
import threading
import os
import time
import tqdm

In [13]:
# hyperparameters
WEIGHT_DECAY = 1e-7
LEARNING_RATE = 1e-1
DROPOUT_KEEP_PROB = 0.5
BATCH_SIZE = 6
VAL_SIZE = 0.3

FREQUENCIES_OVERSAMPLING = True
ANIMAL_OVERSAMPLING = False
OVERSAMPLING_FREQUENCIES = [ 0.50082119,  0.21659901,  0.63500282,  0.44675957,  0.5577173 ,
  0.90370611,  0.68986291,  0.68944916,  0.34719383,  0.89609911,
  0.67162594,  0.67663023,  1.0123    ,  0.51791462,  0.34569939,
  0.76321204,  0.60431893,  0.91221001,  0.48646224,  0.65734299,
  0.74687231,  0.84253148,  0.34175657,  0.45264604]

TRAIN_STEPS = 10
VAL_STEPS = 0
CREATE_PREDICTIONS = False

In [3]:
# constants
IMAGE_SIZE = 224
NUM_CLASSES = 24
SAMPLE_VIDEO_FRAMES = 90
N_TEST_IMAGES = 87485

In [4]:
# paths
CHECKPOINT_PATH_KINETICS_IMAGENET_RGB = 'kinetics_i3d/rgb_imagenet/model.ckpt'
TENSORBOARD_PATH = 'training_results'
MODEL_LOAD_PATH = 'models/model_final.ckpt'
MODEL_SAVE_PATH = 'models/model_final.ckpt'
DATASET_PATH = 'data/'
TEST_SET_PREDICTIONS_FILE = 'predictions_test.csv'

# make sure tensorboard path exists
if not os.path.exists(TENSORBOARD_PATH):
    os.makedirs(TENSORBOARD_PATH)

In [5]:
# build the model: take convolutional layers from i3d rgb model and create fully connected layers
def get_logits(inputs, is_training, dropout_keep_prob):
    with tf.variable_scope('RGB'):
        model_i3d = i3d.InceptionI3d(
          NUM_CLASSES, spatial_squeeze=True, final_endpoint='Mixed_5c')
        mixed_5c, _ = model_i3d(
          inputs, is_training=is_training, dropout_keep_prob=dropout_keep_prob)
    with tf.variable_scope('Logits'):
        net = tf.nn.avg_pool3d(mixed_5c, ksize=[1, 2, 7, 7, 1],
                                 strides=[1, 1, 1, 1, 1], padding=snt.VALID)
        net = tf.nn.dropout(net, dropout_keep_prob)
        logits = i3d.Unit3D(output_channels=NUM_CLASSES,
                          kernel_shape=[1, 1, 1],
                          activation_fn=None,
                          use_batch_norm=False,
                          use_bias=True,
                          name='Conv3d_0c_1x1')(net, is_training=is_training)
        logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze')
        logits = tf.reduce_mean(logits, axis=1)
    return logits

In [6]:
# average gradients (would be necessary for multi gpu training)
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
    # Note that each grad_and_vars looks like the following:
    # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            expanded_g = tf.expand_dims(g, 0)
            grads.append(expanded_g)
        grads_concat = tf.concat(grads, axis=0)
        grads_mean = tf.reduce_mean(grads_concat, axis=0)
        v = grad_and_vars[0][1]
        average_grads.append((grads_mean, v))
    return average_grads

In [7]:
# restore the pretrained weights, except for the last layer
def restore():
    rgb_variable_map = {}
    for variable in tf.global_variables():
        if variable.name.split('/')[0] == 'RGB':
            if 'Logits' in variable.name or 'batch_norm' in variable.name or 'Momentum' in variable.name:
                continue
            rgb_variable_map[variable.name.replace(':0', '')] = variable
    saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
    return saver

In [8]:
def inference(inputs, labels, is_training, dropout_keep_prob):
    logits = get_logits(inputs, is_training, dropout_keep_prob)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        logits=logits, labels=labels))
    varsList = tf.trainable_variables()    
    loss_L2 = tf.add_n([tf.nn.l2_loss(v) for v in varsList
                                    if 'bias' not in v.name]) * WEIGHT_DECAY
    return  loss, loss + loss_L2, logits

In [9]:
#load and preprocess videos for training/testing
def load_and_enqueue(sess,
                     dequeue_data_indices_op,
                     enqueue_op,
                     data_input,
                     labels_input,
                     data_filenames_input,
                     train_steps,
                     val_steps,
                     test_steps):
    feed_dict = {}
    for _ in range(train_steps):
        next_indices = sess.run(dequeue_data_indices_op)
        batch = data.batches_by_indices(next_indices)
        feed_dict[data_input] = batch[0]
        feed_dict[labels_input] = batch[1]
        feed_dict[data_filenames_input] = np.empty([BATCH_SIZE], dtype = str)
        sess.run(enqueue_op, feed_dict=feed_dict)
    for _ in range(val_steps):
        next_indices = sess.run(dequeue_data_indices_op)
        batch = data.val_batches_by_indices(next_indices)
        feed_dict[data_input] = batch[0]
        feed_dict[labels_input] = batch[1]
        feed_dict[data_filenames_input] = np.empty([BATCH_SIZE], dtype = str)
        sess.run(enqueue_op, feed_dict=feed_dict)
    for _ in range(test_steps):
        next_indices = sess.run(dequeue_data_indices_op)
        batch = data.test_batches_by_indices(next_indices)
        feed_dict[data_input] = batch
        feed_dict[labels_input] = np.ones([BATCH_SIZE, NUM_CLASSES])*np.nan
        feed_dict[data_filenames_input] = np.array(data.X_test_ids[next_indices].values)
        sess.run(enqueue_op, feed_dict=feed_dict)

In [10]:
#load indices of videos for training/testing
def enqueue_indices(indices_generator_train, indices_generator_val, indices_generator_test, \
                    sess, indices_loader_worker_ops, \
                    train_steps, val_steps, test_steps):
    feed_dict = {}
    for _ in range(train_steps):
        for data_indices, enqueue_indices_op in indices_loader_worker_ops:
            batch = next(indices_generator_train)
            feed_dict[data_indices] = batch
            sess.run(enqueue_indices_op, feed_dict=feed_dict)
    for _ in range(val_steps):
        for data_indices, enqueue_indices_op in indices_loader_worker_ops:
            batch = next(indices_generator_val)
            feed_dict[data_indices] = batch
            sess.run(enqueue_indices_op, feed_dict=feed_dict)
    for _ in range(test_steps):
        for data_indices, enqueue_indices_op in indices_loader_worker_ops:
            batch = next(indices_generator_test)
            feed_dict[data_indices] = batch
            sess.run(enqueue_indices_op, feed_dict=feed_dict)

In [11]:
# load dataset
data = primatrix_dataset_utils.Dataset(datapath=DATASET_PATH,
               dataset_type = 'small',
               reduce_frames=True, 
               batch_size=BATCH_SIZE, 
               test=False, 
               val_size = VAL_SIZE)

In [14]:
tf.reset_default_graph()

time_start = time.clock()

tower_grads = []               #gradients
tower_scores = []              #scores (loss without L2 regularization)
tower_losses = []              #loss + L2 regularization
tower_inference_test = []      #indices and sigmoid probabilities for inference on test set 

data_loader_worker_ops = []    #ops that are required by the data loading process
indices_loader_worker_ops = [] #ops that are required by the indices loading process

with tf.Session(config=tf.ConfigProto(allow_soft_placement=False,
                                      log_device_placement=False,
                                      intra_op_parallelism_threads=24)) as sess:

    with tf.variable_scope(tf.get_variable_scope()):
        with tf.name_scope('tower'):
            with tf.device('/cpu:0'):
                is_training = tf.placeholder(tf.bool)
                dropout_keep_prob = tf.placeholder(tf.float32)
                opt = tf.train.MomentumOptimizer(
                learning_rate=LEARNING_RATE, momentum=0.9)

                data_indices = tf.placeholder(
                        tf.int32, shape = (BATCH_SIZE))
                queue_data_indices = tf.FIFOQueue(capacity=300, dtypes=[tf.int32], \
                                 shapes=[[BATCH_SIZE]])
                enqueue_data_indices_op = queue_data_indices.enqueue([data_indices])
                dequeue_data_indices_op = queue_data_indices.dequeue()
                labels_input = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_CLASSES))
                data_filenames_input = tf.placeholder(tf.string, shape =(BATCH_SIZE))
                data_input = tf.placeholder(
                        tf.float32,
                        shape=(BATCH_SIZE, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3))
                queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32, tf.float32, tf.string],
                                 shapes=[[BATCH_SIZE, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3],
                                         [BATCH_SIZE, NUM_CLASSES],
                                         [BATCH_SIZE]] )
                enqueue_op = queue.enqueue([data_input, labels_input, data_filenames_input])
                data_loader_worker_ops.append([data_input,
                                               labels_input,
                                               data_filenames_input,
                                               dequeue_data_indices_op,
                                               enqueue_op])
                indices_loader_worker_ops.append([data_indices, enqueue_data_indices_op])

            with tf.device('/gpu:0'):
                [data_input, labels, data_labels] = queue.dequeue()
                score, loss, logits = inference(data_input, labels, is_training, dropout_keep_prob)
                grads = opt.compute_gradients(loss)
                tower_grads.append(grads)
                tower_losses.append(loss)
                tower_scores.append(score)

            with tf.device('/cpu:0'):
                sigmoid_probabilities = tf.nn.sigmoid(logits)
                tower_inference_test.append([data_labels, sigmoid_probabilities])
                    
    with tf.device('/cpu:0'):
        global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
        avg_loss = tf.reduce_mean(tower_losses)
        avg_score = tf.reduce_mean(tower_scores)
        grads = average_gradients(tower_grads)
        train_op = opt.apply_gradients(grads, global_step_tensor)
        
        saver = tf.train.Saver()
        
        if os.path.exists(MODEL_LOAD_PATH + ".index"):
            #Restore from existing model
            tf.logging.info('Restoring from: %s', MODEL_LOAD_PATH)
            saver.restore(sess, MODEL_LOAD_PATH)
        else:
            #Create new model with pretrained weights from i3d model trained on kinetics dataset
            pretrained_saver = restore()
            sess.run(tf.global_variables_initializer())
            tf.logging.info('No checkpoint file found, restoring pretrained weights...')
            pretrained_saver.restore(sess, CHECKPOINT_PATH_KINETICS_IMAGENET_RGB)
            tf.logging.info('Restore Complete.')

        summary_writer_train = tf.summary.FileWriter(TENSORBOARD_PATH + "/plot_train", sess.graph)
        summary_writer_val = tf.summary.FileWriter(TENSORBOARD_PATH + "/plot_val", sess.graph)
        tf.logging.set_verbosity(tf.logging.INFO) 
        
        
        def train(train_steps, val_steps=0, save_model=True, create_predictions=False):
        
            if create_predictions:
                test_steps = N_TEST_IMAGES // (BATCH_SIZE) + 1
            else:
                test_steps = 0
        
            t = threading.Thread(target=enqueue_indices,
                            args=(data.batches_with_oversampling_get_indices(OVERSAMPLING_FREQUENCIES),
                                  data.val_batches_get_indices(),
                                  data.test_batches_get_indices(),
                                  sess, indices_loader_worker_ops,
                                  train_steps,
                                  val_steps,
                                  test_steps,),
                            daemon = True)
            t.start()

            for (data_input,
                 labels_input,
                 data_filenames_input,
                 dequeue_data_indices_op,
                 enqueue_op) in data_loader_worker_ops:
                
                t = threading.Thread(target=load_and_enqueue,
                        args=(sess,
                              dequeue_data_indices_op,
                              enqueue_op, data_input,
                              labels_input, data_filenames_input,
                              train_steps,
                              val_steps,
                              test_steps,),
                        daemon = True)
                t.start()

            if train_steps!=0:
                score_train_sum = 0
                for _ in tqdm.tqdm(range(train_steps),
                                         total=train_steps,
                                         desc = "train"):
                    _, n_steps, loss_train, score_train = sess.run([train_op,
                                                                    global_step_tensor,
                                                                    avg_loss,
                                                                    avg_score],
                                                                    {is_training: True, dropout_keep_prob: 0.5})
                    summary = tf.Summary(value=[tf.Summary.Value(
                            tag="loss", simple_value=loss_train)])
                    summary_writer_train.add_summary(summary, n_steps)
                    score_train_sum += score_train
                score_train_avg = score_train_sum/train_steps
                summary = tf.Summary(value=[tf.Summary.Value(
                        tag="score", simple_value=score_train_avg)])
                summary_writer_train.add_summary(summary, n_steps)


            if val_steps!=0:
                if train_steps==0:
                    n_steps = sess.run(global_step_tensor)
                score_val_sum = 0
                for _ in tqdm.tqdm(range(val_steps), total=val_steps, desc = "val"):
                    score_val = sess.run([avg_score],
                                         {is_training: True, dropout_keep_prob: 1.0})
                    score_val = score_val[0]
                    score_val_sum += score_val
                score_val_avg = score_val_sum/val_steps
                summary = tf.Summary(value=[tf.Summary.Value(
                        tag="score", simple_value=score_val_avg)])
                summary_writer_val.add_summary(summary, n_steps)

            if create_predictions:
                for _ in tqdm.tqdm(range(test_steps),
                                         total=test_steps,
                                         desc = "test"):
                    tower_inference_test_out = sess.run([tower_inference_test],
                                                        {is_training: True, dropout_keep_prob: 1.0})
                    tower_inference_test_out = tower_inference_test_out[0]
                    for indices, probabilities in tower_inference_test_out:
                        indices = [i.decode('utf-8') for i in indices]
                        data.update_predictions_at(indices, probabilities)
                    
                #save predictions to file
                data.predictions.to_csv(TEST_SET_PREDICTIONS_FILE)
                

            if save_model:
                saver.save(sess, MODEL_SAVE_PATH)
                tf.logging.info('Model saved as ' + MODEL_SAVE_PATH)
            
        train(train_steps = TRAIN_STEPS,
              val_steps = VAL_STEPS,
              save_model=False,
              create_predictions=CREATE_PREDICTIONS)

        summary_writer_train.close()
        summary_writer_val.close()
        
time_stop = time.clock()
tf.logging.info("total_time: %ds", time_stop-time_start)

INFO:tensorflow:Restoring from: ../aws_nvirginia/efs/models/model_tb_8GPU_oversampling01_no_2.ckpt
INFO:tensorflow:Restoring parameters from ../aws_nvirginia/efs/models/model_tb_8GPU_oversampling01_no_2.ckpt


train: 100%|██████████| 10/10 [01:46<00:00, 10.68s/it]

INFO:tensorflow:total_time: 122s



