In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
DATA_RAW_PATH = "../data/RAW/RT-BENE.zip" 
DATA_INTER_PATH = "../data/intermediate/"
DATA_PATH = f"{DATA_INTER_PATH}/RT-BENE"
if not os.path.exists(DATA_PATH):
    !unzip -q $DATA_RAW_PATH $DATA_INTER_PATH

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [4]:
data = pd.read_csv(f"{DATA_PATH}/blinks.csv")
data

Unnamed: 0,blink_id,left_eye,right_eye,video,blink
0,0,0_left_000001_rgb.png,0_right_000001_rgb.png,0,0
1,1,0_left_000002_rgb.png,0_right_000002_rgb.png,0,0
2,2,0_left_000003_rgb.png,0_right_000003_rgb.png,0,0
3,3,0_left_000004_rgb.png,0_right_000004_rgb.png,0,0
4,4,0_left_000005_rgb.png,0_right_000005_rgb.png,0,0
...,...,...,...,...,...
107345,107345,16_left_009059_rgb.png,16_right_009059_rgb.png,16,0
107346,107346,16_left_009060_rgb.png,16_right_009060_rgb.png,16,0
107347,107347,16_left_009061_rgb.png,16_right_009061_rgb.png,16,0
107348,107348,16_left_009062_rgb.png,16_right_009062_rgb.png,16,0


### How many videos do we have?

In [5]:
video_ids = np.unique(data["video"])
video_ids

array([ 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])

### Total images

In [6]:
data["blink_id"].count()

107350

### How many images per video?

In [7]:
data_per_video = data.groupby(by=["video"]).count()
blinks_per_video = data.loc[data["blink"] == 1].groupby(by="video").count()
blinks_per_video = blinks_per_video.div(data_per_video, level="video") * 100
data_per_video = pd.concat([data_per_video["blink_id"], blinks_per_video["blink"]], axis=1, keys=["Images in video","% blink frames"])
data_per_video

Unnamed: 0_level_0,Images in video,% blink frames
video,Unnamed: 1_level_1,Unnamed: 2_level_1
0,12865,7.236689
1,8671,1.476185
2,8702,9.066881
3,3205,5.210608
4,4750,2.736842
5,5355,2.054155
7,1857,8.023694
8,6108,7.514735
9,4210,1.068884
10,16559,2.131771


Previous data shows that a rebalancing method will have to be used

### Create Dataset
To have a proper test partition we are going to separate the dataset in two sets of videos. One will be used for training and the other one for testing.
We will select the videos ids 13, 14, 16 as the testing videos because it's images represent around 20% of the total dataset and the percentage of blinks is similar to the rest of the dataset

#### Split train and test sets

In [8]:
testing_ids = [13,14,16]
validation_ids = [8]
training_ids = np.delete(video_ids, np.array(testing_ids)-1)
training_ids = np.delete(training_ids, np.array(validation_ids)-1)
training_ids = training_ids.tolist()
print(f"train: {training_ids}\nvalidation: {validation_ids}\ntesting: {testing_ids}")

train: [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 15]
validation: [8]
testing: [13, 14, 16]


In [9]:
train_data = data.loc[data["video"].isin(training_ids)]
print(f"train_data: {train_data.shape[0]}")
val_data = data.loc[data["video"].isin(validation_ids)]
print(f"val_data: {val_data.shape[0]}")
test_data = data.loc[data["video"].isin(testing_ids)]
print(f"test_data: {test_data.shape[0]}")

train_data: 81736
val_data: 6108
test_data: 19506


#### Generic generator class

In [10]:
import cv2
class RTBeneDataset:
    def __init__(self, phase: str, data: pd.DataFrame, mean: float, std: float, transforms = None):
        self.phase = phase
        self.data = data
        self.mean = mean
        self.std = std
        self.transforms = transforms
        
        if self.phase == "train":
            #Shuffle the data
            self.data = self.data.sample(frac=1).reset_index(drop=True)
        else:
            self.data = self.data.reset_index(drop=True)
            
            
    def __getitem(self, idx):
        row = self.data.loc[idx,["left_eye", "right_eye"]].to_list(), self.data.loc[idx,["blink"]].to_list()[0]
        return row
        #return RTBeneDataset.load_row(DATA_PATH, row)
    
    def __call__(self):
        for i in range(self.data.shape[0]):
            yield self.__getitem(i)
            
            if i == (self.data.shape[0] -1):
                # When all the dataset is readed, reshuffle again
                self.data = self.data.sample(frac=1).reset_index(drop=True)
       
    @staticmethod
    def load_row(x, y):
        print(x)
        left_image = cv2.imread(f"{DATA_PATH}/images/{x[0]}")
        right_image = cv2.imread(f"{DATA_PATH}/images/{x[1]}")
        return (left_image/255, right_image/255), y
    
    @staticmethod
    @tf.function
    def tf_load_row(x, y):
        image_l = tf.io.read_file(tf.strings.join([f"{DATA_PATH}/images/", x[0]]))
        image_r = tf.io.read_file(tf.strings.join([f"{DATA_PATH}/images/", x[1]]))
        image_l = tf.image.decode_png(image_l, channels=3)
        image_r = tf.image.decode_png(image_r, channels=3)
        return (image_l/255, image_r/255), y

In [11]:
train_RTB = RTBeneDataset("train", train_data, 127.5, 1)
val_RTB = RTBeneDataset("val", val_data, 127.5, 1)
test_RTB = RTBeneDataset("val", test_data, 127.5, 1)

In [12]:
batch_size = 128

train_dataset = tf.data.Dataset.from_generator(train_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE).batch(batch_size).repeat()

val_dataset = tf.data.Dataset.from_generator(val_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE).batch(batch_size)

test_dataset = tf.data.Dataset.from_generator(test_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE).batch(batch_size)

2022-02-05 12:54:11.354113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 12:54:11.385436: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 12:54:11.385780: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 12:54:11.386932: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

### Create model

In [13]:
from tensorflow import keras
from tensorflow.keras.applications.vgg16 import VGG16

In [14]:
left_eye = keras.Input(shape=(36, 60, 3))
right_eye = keras.Input(shape=(36, 60, 3))
vgg_left = VGG16(weights="imagenet", include_top=False, input_shape=(36, 60, 3))
vgg_left._name = "vgg_left"
vgg_right = VGG16(weights="imagenet", include_top=False, input_shape=(36, 60, 3))
vgg_right._name = "vgg_right"
left_feat_extractor = vgg_left(left_eye)
right_feat_extractor = vgg_right(right_eye)
concat = keras.layers.Concatenate()([left_feat_extractor, right_feat_extractor])
flat = keras.layers.Flatten()(concat)
dense_1 = keras.layers.Dense(128, activation="relu")(flat)
out = keras.layers.Dense(1, activation="sigmoid")(dense_1)

model = keras.Model(inputs=([left_eye, right_eye]), outputs=out)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 36, 60, 3)]  0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 36, 60, 3)]  0           []                               
                                                                                                  
 vgg_left (Functional)          (None, 1, 1, 512)    14714688    ['input_1[0][0]']                
                                                                                                  
 vgg_right (Functional)         (None, 1, 1, 512)    14714688    ['input_2[0][0]']                
                                                                                              

In [15]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def F1_score(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))
    
model.compile(optimizer=keras.optimizers.Adam(), 
              loss=keras.losses.BinaryCrossentropy(), 
              metrics=[keras.metrics.BinaryAccuracy(), keras.metrics.Precision(), keras.metrics.Recall(), F1_score])

In [16]:
from datetime import datetime

run_id = f'{model.name}-{datetime.now().strftime("%m-%H%M%S")}'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=f"models/{run_id}/" + "{epoch:02d}-{val_F1_score:.2f}",
    save_weights_only=True,
    monitor='val_F1_score',
    mode='max',
    save_best_only=True)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"./logs/{run_id}", update_freq=100,)

history = model.fit(train_dataset, 
          validation_data=val_dataset, 
          epochs=10, 
          steps_per_epoch=int(train_data.shape[0]/batch_size), 
          validation_steps=int(val_data.shape[0]/batch_size),
          callbacks=[model_checkpoint_callback, tensorboard_callback])

Epoch 1/10


2022-02-05 12:54:20.313724: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8204
2022-02-05 12:54:21.090801: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-02-05 12:54:22.706547: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
model.load_weights("models/model-02-125418/09-0.64")
results = model.evaluate(test_dataset)

