**Setup**

In [0]:
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

%tensorflow_version 1.x
import tensorflow as tf
import numpy as np
import os
import sys
from tqdm.notebook import tqdm

slim = tf.contrib.slim

Mounting Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Define location of project file in Google Drive

In [0]:
drive_affwild_path = '/content/drive/My Drive/aff_wild'
inputcsv_path = os.path.join(drive_affwild_path, 'input.csv')

Append path to files in drive to allow importing of personal python modules

In [0]:
sys.path.append(sys.path.append(drive_affwild_path))

**Pretrained model locations**

Model | CCC A | CCC V | MSE A | MSE V
--- | --- | --- | --- | ---
affwildnet_vggface_gru | 0.7251 | 0.8107 | 0.0344 | 0.0600
affwildnet_resnet_gru | 0.7009 | 0.7760 | 0.0346 | 0.0615
vggface_4096 | **0.8013** | **0.8486** | **0.0266** | **0.0454**
vggface_2000 | 0.7881 | 0.8410 | 0.0299 | 0.0497

In [0]:
affwildnet_vggface_path = os.path.join(drive_affwild_path, 'affwildnet', 'affwildnet-vggface-gru', 'model.ckpt-0')
affwildnet_resnet_path  = os.path.join(drive_affwild_path, 'affwildnet', 'affwildnet-resnet-gru', 'model.ckpt-0')
vggface_2000_path       = os.path.join(drive_affwild_path, 'vggface', '4096x2000x2', 'model.ckpt-0')
vggface_4096_path       = os.path.join(drive_affwild_path, 'vggface', '4096x4096x2', 'model.ckpt-975')

**Defining tf cli flags**

In [0]:
FLAGS = tf.app.flags.FLAGS

In [0]:
if 'batch_size' in FLAGS.__flags.keys():
    del FLAGS.batch_size
tf.app.flags.DEFINE_integer('batch_size', 1, 'The batch size to use.')

In [0]:
if 'seq_length' in FLAGS.__flags.keys():
    del FLAGS.seq_length
tf.app.flags.DEFINE_integer('seq_length', 80, 
  'the sequence length: how many consecutive frames to use for the RNN; if the network is only CNN then put here any number you want : total_batch_size = batch_size * seq_length')

In [0]:
if 'size' in FLAGS.__flags.keys():
    del FLAGS.size
tf.app.flags.DEFINE_integer('size', 96, 'dimensions of input images, e.g. 96x96')

In [0]:
if 'network' in FLAGS.__flags.keys():
    del FLAGS.network
tf.app.flags.DEFINE_string('network',  'affwildnet_resnet', 
  'which network architecture we want to use,  pick between : vggface_4096, vggface_2000, affwildnet_vggface, affwildnet_resnet')                           

In [0]:
if 'input_file' in FLAGS.__flags.keys():
    del FLAGS.input_file
tf.app.flags.DEFINE_string('input_file',  inputcsv_path, 
  'the input file : it should be in the format: image_file_location,valence_value,arousal_value  and images should be jpgs')                           

In [0]:
if 'pretrained_model_checkpoint_path' in FLAGS.__flags.keys():
    del FLAGS.pretrained_model_checkpoint_path
tf.app.flags.DEFINE_string('pretrained_model_checkpoint_path', affwildnet_resnet_path,
                           '''the pretrained model checkpoint path to restore,if there exists one  '''
                           '''''')

In [0]:
# Fixes an error related to using tf.app.flags in jupyter notebooks
sys.argv = "".split(" ")

**data_process.py**

In [0]:
def read_labeled_image_list(image_list_file):
    """Reads a .csv file containing paths and labels, should be in the format:
      image_file_location1,valence_value1,arousal_value1
      image_file_location2,valence_value2,arousal_value2
      ...
        images should be jpgs
          Returns:
          a list with all filenames in file image_list_file and a list containing lists of the 2 respective labels  
    """
    f = open(image_list_file, 'r')
    filenames = []

    labels_val = []
    labels_ar = []

    for line in f:
        inputs = line.rstrip().split(',')
        filenames.append(inputs[0])
        labels_val.append(float(inputs[1]))
        labels_ar.append(float(inputs[2]))
    

    labels = [list(a) for a in zip(labels_val, labels_ar)]
    return filenames, labels

In [0]:
def decodeRGB(input_queue,seq_length,size=96):
    """ Args:
          filename_and_label_tensor: A scalar string tensor.
          Returns:
          Three tensors: one with the decoded images, one with the corresponding labels and another with the image file locations
    """
    images = []
    labels = input_queue[1]
    images_locations = input_queue[2]

    for i in range(seq_length):
      file_content = tf.read_file(input_queue[0][i])
      image = tf.image.decode_jpeg(file_content, channels=3)
      image = tf.image.resize_images(image, tf.convert_to_tensor([size,size]))
      images.append(image)	

    return images,labels,images_locations

In [0]:
def make_rnn_input_per_seq_length_size(images,labels,seq_length):
	"""
        Args:
        images : the images file locations with shape (N,1) where N is the total number of images
        labels: the corresponding labels with shape (N,2) where N is the total number of images
        seq_length: the sequence length that we want
        Returns:
        Two tensors: the images file locations with shape ( int(N/80),80 ) and corresponding labels with shape ( int(N/80),80,2 )
	"""
	ims =[]
	labs = []
	for l in range(int(len(images)/seq_length)):   
	        a = images[int(l)*seq_length:int(l)*seq_length+seq_length]
	        b = labels[int(l)*seq_length:int(l)*seq_length+seq_length]
	        ims.append(a)
	        labs.append(b)
   
	return ims,labs

**Concordance calculation function**

In [0]:
def concordance_cc2(r1, r2):
     mean_cent_prod = ((r1 - r1.mean()) * (r2 - r2.mean())).mean()
     return (2 * mean_cent_prod) / (r1.var() + r2.var() + (r1.mean() - r2.mean()) ** 2)

**Evaluation function**

In [0]:
def evaluate():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = read_labeled_image_list(FLAGS.input_file)
        # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ;
        #                             for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person 
        image_list, label_list = make_rnn_input_per_seq_length_size(image_list,label_list,FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels,images],num_epochs=None, shuffle=False, seed=None,capacity=1000, shared_name=None, name=None)
        images_batch, labels_batch, image_locations_batch = decodeRGB(input_queue,FLAGS.seq_length,FLAGS.size)
        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch,[-1,96,96,3])
        labels_batch = tf.reshape(labels_batch,[-1,2])
        
        if FLAGS.network == 'vggface_4096':
            from vggface import vggface_4096x4096x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()
            
        elif FLAGS.network == 'vggface_2000':
            from vggface import vggface_4096x2000x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()
        
        elif FLAGS.network == 'affwildnet_resnet':
            from tensorflow.contrib.slim.python.slim.nets import resnet_v1
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net,_  = resnet_v1.resnet_v1_50(inputs=images_batch,is_training=False,num_classes=None)
            
            with tf.variable_scope('rnn') as scope:
                cnn = tf.reshape(net,[FLAGS.batch_size,FLAGS.seq_length,-1])
                cell= tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(128) for _ in range(2)])
                outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32)
                outputs = tf.reshape(outputs, (FLAGS.batch_size * FLAGS.seq_length, 128))
                        
                weights_initializer = tf.truncated_normal_initializer(
                    stddev=0.01)
                weights = tf.get_variable('weights_output',
                                        shape=[128, 2],
                                        initializer=weights_initializer,
                                        trainable = True)
                biases = tf.get_variable('biases_output',
                                        shape=[2],
                                        initializer=tf.zeros_initializer,trainable = True)
                
                prediction = tf.nn.xw_plus_b(outputs, weights, biases) 

        elif FLAGS.network == 'affwildnet_vggface':
            from affwildnet import vggface_gru as net
            network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()
        
        num_batches = int(len(image_list)/FLAGS.batch_size)

        variables_to_restore =  tf.global_variables()
        
        with tf.Session() as sess:

            init_fn = slim.assign_from_checkpoint_fn(
                            FLAGS.pretrained_model_checkpoint_path, variables_to_restore,
                            ignore_missing_vars=False)

            init_fn(sess)
            # print('Loading model {}'.format(FLAGS.pretrained_model_checkpoint_path))

            tf.train.start_queue_runners(sess=sess)

            coord = tf.train.Coordinator()
    
            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in tqdm(range(num_batches), desc = "Batch num"):

                    pr, l, imm = sess.run([prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)
    
                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            conc_arousal = concordance_cc2(predictions[:,1], labels[:,1])
            conc_valence = concordance_cc2(predictions[:,0], labels[:,0])
    
            print('Concordance on valence : {}'.format(conc_valence))
            print('Concordance on arousal : {}'.format(conc_arousal))
            # print('Concordance on total : {}'.format((conc_arousal+conc_valence)/2))

            # print(labels[:,1])
            # print(labels[:,0])

            mse_arousal = sum((predictions[:,1] - labels[:,1])**2)/len(labels[:,1])
            print('MSE Arousal : {}'.format(mse_arousal))
            mse_valence = sum((predictions[:,0] - labels[:,0])**2)/len(labels[:,0])
            print('MSE Valence : {}'.format(mse_valence))

    return conc_valence, conc_arousal, (conc_arousal+conc_valence)/2, mse_arousal, mse_valence

In [0]:
_ = evaluate()