In [1]:
import tensorflow as tf
from tensorflow.keras import Model, layers

In [2]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
import numpy as np 
import matplotlib.pyplot as plt
import json
import os 
from PIL import Image

In [3]:
AUTO = tf.data.experimental.AUTOTUNE # used in tf.data.Dataset API

TRAINING_FILENAMES = '../datasets/gazetrack_tfrec/train.tfrec'
VALID_FILENAMES = '../datasets/gazetrack_tfrec/val.tfrec'
TEST_FILENAMES = '../datasets/gazetrack_tfrec/test.tfrec'
BATCH_SIZE = 256

SEED = tf.Variable(256)

In [4]:
def parse_tfrecord_fn(example):
    feature_description = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "path": tf.io.FixedLenFeature([], tf.string),
        "device": tf.io.FixedLenFeature([], tf.string),
        "screen_h": tf.io.FixedLenFeature([], tf.int64),
        "screen_w": tf.io.FixedLenFeature([], tf.int64),
        "face_valid": tf.io.FixedLenFeature([], tf.int64),
        "face_x": tf.io.FixedLenFeature([], tf.int64),
        "face_y": tf.io.FixedLenFeature([], tf.int64),
        "face_w": tf.io.FixedLenFeature([], tf.int64),
        "face_h": tf.io.FixedLenFeature([], tf.int64),
        "leye_x": tf.io.FixedLenFeature([], tf.int64),
        "leye_y": tf.io.FixedLenFeature([], tf.int64),
        "leye_w": tf.io.FixedLenFeature([], tf.int64),
        "leye_h": tf.io.FixedLenFeature([], tf.int64),
        "reye_x": tf.io.FixedLenFeature([], tf.int64),
        "reye_y": tf.io.FixedLenFeature([], tf.int64),
        "reye_w": tf.io.FixedLenFeature([], tf.int64),
        "reye_h": tf.io.FixedLenFeature([], tf.int64),
        "dot_xcam": tf.io.FixedLenFeature([], tf.float32),
        "dot_y_cam": tf.io.FixedLenFeature([], tf.float32),
        "dot_x_pix": tf.io.FixedLenFeature([], tf.float32),
        "dot_y_pix": tf.io.FixedLenFeature([], tf.float32),
        "reye_x1": tf.io.FixedLenFeature([], tf.int64),
        "reye_y1": tf.io.FixedLenFeature([], tf.int64),
        "reye_x2": tf.io.FixedLenFeature([], tf.int64),
        "reye_y2": tf.io.FixedLenFeature([], tf.int64),
        "leye_x1": tf.io.FixedLenFeature([], tf.int64),
        "leye_y1": tf.io.FixedLenFeature([], tf.int64),
        "leye_x2": tf.io.FixedLenFeature([], tf.int64),
        "leye_y2": tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example, feature_description)
    example["image"] = tf.io.decode_jpeg(example["image"], channels=3)
    return example


def augmentation(image, training = True):
    if training:
        aug = tf.keras.Sequential([
                layers.Resizing(128+10, 128+10),
                layers.RandomCrop(128, 128, 256),
                layers.Rescaling(1./255),
                layers.Normalization(mean = (0.3741, 0.4076, 0.5425), variance = (0.0004, 0.0004, 0.0004))
                ])
        
    else:
        aug = tf.keras.Sequential([
                layers.Resizing(128+10, 128+10),
                layers.Rescaling(1./255),
                layers.Normalization(mean = (0.3741, 0.4076, 0.5425), variance = (0.0004, 0.0004, 0.0004))
                ])
    
    image = aug(image)
    
    return image

def prepare_sample(features):
    image = features['image']
    w = tf.shape(image)[0]
    h = tf.shape(image)[1]
    
    w = tf.cast(w, tf.int64)
    h = tf.cast(h, tf.int64)
    
    screen_w, screen_h = features['screen_w'], features['screen_h']
    
    kps = [features['leye_x1']/w, features['leye_y1']/h, features['leye_x2']/w, features['leye_y2']/h,
           features['reye_x1']/w, features['reye_y1']/h, features['reye_x2']/w, features['reye_y2']/h]
    # kps has type float64
    

    lx, ly, lw, lh = features['leye_x'], features['leye_y'], features['leye_w'], features['leye_h']
    rx, ry, rw, rh = features['reye_x'], features['reye_y'], features['reye_w'], features['reye_h']
    
    # lx, ly, lw, lh = tf.cast((lx, ly, lw, lh), tf.int32)
    # rx, ry, rw, rh = tf.cast((rx, ry, rw, rh), tf.int32)
    
    lx = tf.cast(lx, tf.int32)
    ly = tf.cast(ly, tf.int32)
    lw = tf.cast(lw, tf.int32)
    lh = tf.cast(lh, tf.int32)
    
    rx = tf.cast(rx, tf.int32)
    ry = tf.cast(ry, tf.int32)
    rw = tf.cast(rw, tf.int32)
    rh = tf.cast(rh, tf.int32)
    
    # l_eye = tf.image.crop_to_bounding_box(image, max(0, ly), max(0, lx), max(0, lh), max(0, lw))  
    # r_eye = tf.image.crop_to_bounding_box(image, max(0, ry), max(0, rx), max(0, rh), max(0, rw))
    
    l_eye = tf.image.crop_to_bounding_box(image, ly, lx, lh, lw)  
    r_eye = tf.image.crop_to_bounding_box(image, ry, rx, rh, rw)
    
    l_eye = tf.image.flip_left_right(l_eye)
    
    out = [features['dot_xcam'], features['dot_y_cam']]
    # out has type float32
    
    l_eye = augmentation(l_eye)
    r_eye = augmentation(r_eye)
    
    # model_in = (l_eye, r_eye, kps)
    # return model_in, out
    return l_eye, r_eye, kps, out
    # return l_eye, r_eye, kps, out, screen_w, screen_h

def get_batched_dataset(filenames, batch_size):
    option_no_order = tf.data.Options()
    option_no_order.deterministic = False  # disable order, increase speed
    
    dataset = (
        tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
        .with_options(option_no_order)
        .map(parse_tfrecord_fn, num_parallel_calls=AUTO)
        .map(prepare_sample, num_parallel_calls=AUTO)
        .shuffle(batch_size*10)
        .batch(batch_size)
        .prefetch(buffer_size=AUTO)
    )
    
    dataset_len = sum(1 for _ in tf.data.TFRecordDataset(filenames))
    print(f"No. of train samples: {dataset_len}")
    
    return dataset

In [5]:
# train_dataset = get_batched_dataset(TRAINING_FILENAMES, BATCH_SIZE)
valid_dataset = get_batched_dataset(VALID_FILENAMES, BATCH_SIZE)
# test_dataset = get_batched_dataset(TEST_FILENAMES, BATCH_SIZE)

No. of train samples: 43458


In [6]:
# iterator = iter(train_dataset)
# iterator.get_next()
sample = next(iter(valid_dataset))

In [7]:
limg, rimg, lms, out = sample

limg[0].shape

TensorShape([128, 128, 3])

In [8]:
lms[0]

<tf.Tensor: shape=(8,), dtype=float64, numpy=
array([0.43125   , 0.79791667, 0.5203125 , 0.78958333, 0.2265625 ,
       0.78125   , 0.309375  , 0.79375   ])>

In [9]:
img_array = tf.expand_dims(limg[0], axis=0)

In [10]:
img_array.shape

TensorShape([1, 128, 128, 3])

In [11]:
# for features in train_dataset.take(1):
#     print(len(features[1]))
#     print(len(features[0]))
#     print(len(features[0][0]))
    
    
    
#     image = features['image']
    
    
#     print(f"lefteye shape: {l_eye.shape}")
#     print(f"righteye shape: {l_eye.shape}")
#     plt.figure(figsize=(7, 7))
#     plt.imshow(l_eye)
#     plt.show()
    
#     plt.figure(figsize=(7, 7))
#     plt.imshow(r_eye)
#     plt.show()

### Model

In [12]:
class eye_model(Model):
  def __init__(self):
    super(eye_model, self).__init__(name='')

    self.conv1 = layers.Conv2D(32, kernel_size=7, strides=2, padding='valid', name='em-conv2d1')
    self.conv2 = layers.Conv2D(64, kernel_size=5, strides=2, padding='valid', name='em-conv2d2')
    self.conv3 = layers.Conv2D(128, kernel_size=3, strides=1, padding='valid', name='em-conv2d3')
    self.bn = layers.BatchNormalization(axis = 1, momentum=0.9, name='em-bn')
    self.leakyrelu = layers.LeakyReLU(alpha=0.01, name='em-leaky-relu') 
    self.avgpool = layers.AveragePooling2D(pool_size=2, name='em-relu')
    self.dropout = layers.Dropout(rate=0.02, name='em-dropout')
    

  def call(self, input_tensor):
    x = self.conv1(input_tensor)
    x = self.bn(x)
    x = self.leakyrelu(x)
    x = self.avgpool(x)
    x = self.dropout(x)
    
    x = self.conv2(x)
    x = self.bn(x)
    x = self.leakyrelu(x)
    x = self.avgpool(x)
    x = self.dropout(x)
    
    x = self.conv3(x)
    x = self.bn(x)
    x = self.leakyrelu(x)
    x = self.avgpool(x)
    x = self.dropout(x)
    
    return x

class landmark_model(Model):
  def __init__(self):
    super(landmark_model, self).__init__(name='')

    self.dense1 = layers.Dense(128, name='lm-dense1')
    self.dense2 = layers.Dense(16, name='lm-dense2')
    self.dense3 = layers.Dense(16, name='lm-dense3')
    self.bn = layers.BatchNormalization(momentum=0.9, name='lm-bn')
    self.relu = layers.ReLU(name='lm-relu')

  def call(self, input_tensor):
    x = self.dense1(input_tensor)
    x = self.bn(x)
    x = self.relu(x)
    
    x = self.dense2(x)
    x = self.bn(x)
    x = self.relu(x)
    
    x = self.dense3(x)
    x = self.bn(x)
    x = self.relu(x)   
    
    return x

class gazetrack_model(Model):
  def __init__(self):
    super(gazetrack_model, self).__init__(name='')

    self.eye_model = eye_model()
    self.lmModel = landmark_model()
    
    self.dense1 = layers.Dense(8, name='gm-dense1')
    self.dense2 = layers.Dense(4, name='gm-dense2')
    self.dense3 = layers.Dense(2, name='gm-dense3')
    
    self.bn = layers.BatchNormalization(momentum=0.9, name='gm-bn')
    self.dropout = layers.Dropout(rate=0.12, name='gm-dropout')
    self.relu = layers.ReLU(name='gm-relu')

    

  def call(self, model_in):
    leftEye, rightEye, lms = model_in
    l_eye_feat = tf.reshape(self.eye_model(leftEye), (3, 128*128))
    r_eye_feat = tf.reshape(self.eye_model(rightEye), (3, 128*128))
    
    lm_feat = self.lmModel(lms)
    
    combined_feat = tf.concat((l_eye_feat, r_eye_feat, lm_feat),1)
    
    x = self.dense1(combined_feat)
    x = self.bn(x)
    x = self.dropout(x)
    x = self.relu(x)
    
    x = self.dense2(x)
    x = self.bn(x)
    x = self.relu(x)
    
    x = self.dense3(x)
    
    return x

In [13]:
lr = 0.016
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr,beta_1=0.9, beta_2=0.999, epsilon=1e-07)
scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', verbose=1, mode='min')

In [14]:
batch_size = 256

In [15]:
# strategy = tf.distribute.MirroredStrategy()
# print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

# with strategy.scope():

model = gazetrack_model()

# # input_shape = (256, 32, 32, 3)
# model.build((leftEye, rightEye, lms))
# print(model.summary())

### Train

In [16]:
model.compile(optimizer=optimizer, loss=loss, metrics=[tf.keras.metrics.mean_squared_error])

In [17]:
# model_predict = Model.predict()
# model_predict_batch = Model.predict_on_batch()

model.fit(
    x=train_dataset,   
    batch_size=batch_size,  
    epochs=1,  
    verbose='auto',   #auto=1, 1=progress bar, 2=one line per epoch( maybe use 2 if running job)
    callbacks=[scheduler],
    validation_data=valid_dataset,
    shuffle=True,    #probably will not work as our dataset is a tf.data object
    initial_epoch=0,     #epoch at which to resume training
    workers=1,
    use_multiprocessing=False
)


NameError: name 'train_dataset' is not defined

In [None]:
# SAVE
model.save(args.model_output + '/1')

In [None]:
# EXTRA
validation_array = np.array(list(validation_dataset.unbatch().take(-1).as_numpy_iterator()))
test_x = np.stack(validation_array[:,0])
test_y = np.stack(validation_array[:,1])

# Use the model to predict the labels
test_predictions = model.predict(test_x)
test_y_pred = np.argmax(test_predictions, axis=1)
test_y_true = np.argmax(test_y, axis=1)

# Evaluating model accuracy and logging it as a scalar for TensorBoard hyperparameter visualization.
accuracy = sklearn.metrics.accuracy_score(test_y_true, test_y_pred)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
logging.info('Test accuracy:{}'.format(accuracy))