In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
DATA_RAW_PATH = "../data/RAW/RT-BENE.zip" 
DATA_INTER_PATH = "/opt/data/Ubuntu/projects/RT-BENE/data/intermediate"
DATA_PATH = f"{DATA_INTER_PATH}/RT-BENE"
if not os.path.exists(DATA_PATH):
    !unzip -q $DATA_RAW_PATH -d $DATA_INTER_PATH

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [4]:
data = pd.read_csv(f"{DATA_PATH}/blinks.csv")
data

Unnamed: 0,blink_id,left_eye,right_eye,video,blink
0,0,0_left_000001_rgb.png,0_right_000001_rgb.png,0,0
1,1,0_left_000002_rgb.png,0_right_000002_rgb.png,0,0
2,2,0_left_000003_rgb.png,0_right_000003_rgb.png,0,0
3,3,0_left_000004_rgb.png,0_right_000004_rgb.png,0,0
4,4,0_left_000005_rgb.png,0_right_000005_rgb.png,0,0
...,...,...,...,...,...
107345,107345,16_left_009059_rgb.png,16_right_009059_rgb.png,16,0
107346,107346,16_left_009060_rgb.png,16_right_009060_rgb.png,16,0
107347,107347,16_left_009061_rgb.png,16_right_009061_rgb.png,16,0
107348,107348,16_left_009062_rgb.png,16_right_009062_rgb.png,16,0


### How many videos do we have?

In [5]:
video_ids = np.unique(data["video"])
video_ids

array([ 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])

### Total images

In [6]:
data["blink_id"].count()

107350

### How many images per video?

In [7]:
data_per_video = data.groupby(by=["video"]).count()
blinks_per_video = data.loc[data["blink"] == 1].groupby(by="video").count()
blinks_per_video = blinks_per_video.div(data_per_video, level="video") * 100
data_per_video = pd.concat([data_per_video["blink_id"], blinks_per_video["blink"]], axis=1, keys=["Images in video","% blink frames"])
data_per_video

Unnamed: 0_level_0,Images in video,% blink frames
video,Unnamed: 1_level_1,Unnamed: 2_level_1
0,12865,7.236689
1,8671,1.476185
2,8702,9.066881
3,3205,5.210608
4,4750,2.736842
5,5355,2.054155
7,1857,8.023694
8,6108,7.514735
9,4210,1.068884
10,16559,2.131771


Previous data shows that a rebalancing method will have to be used

### Create Dataset
To have a proper test partition we are going to separate the dataset in two sets of videos. One will be used for training and the other one for testing.
We will select the videos ids 13, 14, 16 as the testing videos because it's images represent around 20% of the total dataset and the percentage of blinks is similar to the rest of the dataset

#### Split train and test sets

In [8]:
testing_ids = [13,14,16]
validation_ids = [8]
training_ids = np.delete(video_ids, np.array(testing_ids)-1)
training_ids = np.delete(training_ids, np.array(validation_ids)-1)
training_ids = training_ids.tolist()
print(f"train: {training_ids}\nvalidation: {validation_ids}\ntesting: {testing_ids}")

train: [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 15]
validation: [8]
testing: [13, 14, 16]


In [9]:
train_data = data.loc[data["video"].isin(training_ids)]
print(f"train_data: {train_data.shape[0]}")
val_data = data.loc[data["video"].isin(validation_ids)]
print(f"val_data: {val_data.shape[0]}")
test_data = data.loc[data["video"].isin(testing_ids)]
print(f"test_data: {test_data.shape[0]}")

train_data: 81736
val_data: 6108
test_data: 19506


#### Generic generator class

In [11]:
import cv2
class RTBeneDataset:
    def __init__(self, phase: str, data: pd.DataFrame, mean: float, std: float, transforms = None):
        self.phase = phase
        self.data = data
        self.mean = mean
        self.std = std
        self.transforms = transforms
        
        if self.phase == "train":
            #Shuffle the data
            self.data = self.data.sample(frac=1).reset_index(drop=True)
        else:
            self.data = self.data.reset_index(drop=True)
            
            
    def __getitem(self, idx):
        row = self.data.loc[idx,["left_eye", "right_eye"]].to_list(), self.data.loc[idx,["blink"]].to_list()[0]
        return row
    
    def __call__(self):
        for i in range(self.data.shape[0]):
            yield self.__getitem(i)
            
            if i == (self.data.shape[0] -1):
                # When all the dataset is readed, reshuffle again
                self.data = self.data.sample(frac=1).reset_index(drop=True)
       
    @staticmethod
    def load_row(x, y):
        print(x)
        left_image = cv2.imread(f"{DATA_PATH}/images/{x[0]}")
        right_image = cv2.imread(f"{DATA_PATH}/images/{x[1]}")
        return (left_image/255, right_image/255), y
    
    @staticmethod
    @tf.function
    def tf_load_row(x, y):
        image_l = tf.io.read_file(tf.strings.join([f"{DATA_PATH}/images/", x[0]]))
        image_r = tf.io.read_file(tf.strings.join([f"{DATA_PATH}/images/", x[1]]))
        image_l = tf.image.decode_png(image_l, channels=3)
        image_r = tf.image.decode_png(image_r, channels=3)
        return (image_l/255, image_r/255), y

## Resampling to get better distribution of the data

In [39]:
train_blink_RTB = RTBeneDataset("train", train_data.loc[train_data["blink"] == 1], 127.5, 1)
train_no_blink_RTB = RTBeneDataset("train", train_data.loc[train_data["blink"] == 0], 127.5, 1)
val_RTB = RTBeneDataset("val", val_data, 127.5, 1)
test_RTB = RTBeneDataset("val", test_data, 127.5, 1)

In [45]:
batch_size = 128

blink_dataset = tf.data.Dataset.from_generator(train_blink_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).repeat()

no_blink_dataset = tf.data.Dataset.from_generator(train_no_blink_RTB, 
                                                  output_types=(tf.string, tf.int32), 
                                                  output_shapes=((2),())).repeat()

train_dataset  = tf.data.Dataset.sample_from_datasets(
    [blink_dataset, no_blink_dataset], [0.5, 0.5]).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size).repeat()


val_dataset = tf.data.Dataset.from_generator(val_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE).batch(batch_size)

test_dataset = tf.data.Dataset.from_generator(test_RTB, 
                                               output_types=(tf.string, tf.int32), 
                                               output_shapes=((2),())).map(RTBeneDataset.tf_load_row, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE).batch(batch_size)

### Create model

In [114]:
from tensorflow import keras
from tensorflow.keras import regularizers, initializers, layers
from tensorflow.keras.applications import DenseNet121

In [115]:
left_eye = keras.Input(shape=(36, 60, 3))
right_eye = keras.Input(shape=(36, 60, 3))
feature_extractor_left = DenseNet121(weights="imagenet", include_top=False, input_shape=(36, 60, 3))
feature_extractor_left._name = "feat_left"
for layer in feature_extractor_left.layers:
    layer.trainable=False
feature_extractor_right = DenseNet121(weights="imagenet", include_top=False, input_shape=(36, 60, 3))
feature_extractor_right._name = "feat_right"
for layer in feature_extractor_right.layers:
    layer.trainable=False
left_feat_extractor = feature_extractor_left(left_eye)
right_feat_extractor = feature_extractor_right(right_eye)
concat = layers.Concatenate()([left_feat_extractor, right_feat_extractor])
flat = layers.Flatten()(concat)
dense_1 = layers.Dense(256, activation="relu", 
                       kernel_regularizer=regularizers.l1(1e-5),
                       kernel_initializer=initializers.GlorotNormal)(flat)
do_1 = layers.Dropout(0.2)(dense_1)
dense_2 = layers.Dense(128, activation="relu", 
                       kernel_regularizer=regularizers.l1(1e-5),
                       kernel_initializer=initializers.GlorotNormal)(do_1)
do_2 = layers.Dropout(0.2)(dense_2)
out = layers.Dense(1, activation="sigmoid")(do_2)

model = keras.Model(inputs=([left_eye, right_eye]), outputs=out, name="resnet_all_data_rebalanced_dropout")
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "resnet_all_data_rebalanced_dropout"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_81 (InputLayer)          [(None, 36, 60, 3)]  0           []                               
                                                                                                  
 input_82 (InputLayer)          [(None, 36, 60, 3)]  0           []                               
                                                                                                  
 feat_left (Functional)         (None, 1, 1, 1024)   7037504     ['input_81[0][0]']               
                                                                                                  
 feat_right (Fun

In [116]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def F1_score(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))
    
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), 
              loss=keras.losses.BinaryCrossentropy(), 
              metrics=[keras.metrics.BinaryAccuracy(), keras.metrics.Precision(), keras.metrics.Recall(), F1_score])

In [117]:
from datetime import datetime

run_id = f'{model.name}-{datetime.now().strftime("%m-%H%M%S")}'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=f"models/{run_id}/" + "{epoch:02d}-{val_F1_score:.2f}",
    save_weights_only=True,
    monitor='val_F1_score',
    mode='max',
    save_best_only=True)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"./logs/{run_id}", update_freq=100,)

reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=5)

history = model.fit(train_dataset, 
          validation_data=val_dataset, 
          epochs=20, 
          steps_per_epoch=int(train_data.shape[0]/batch_size), 
          validation_steps=int(val_data.shape[0]/batch_size),
          callbacks=[model_checkpoint_callback, tensorboard_callback, reduce_lr_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [86]:
model.load_weights("models/vgg_data_rebalanced-02-131444/11-0.82")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f45483d6a90>

In [83]:
results = model.evaluate(test_dataset)

