In [9]:
from scipy.io import loadmat
import numpy as np
import pandas as pd
from PIL import Image
import glob
from utils import *
from tqdm import tqdm
from sklearn.model_selection import train_test_split

If there is no dataset and utils:

In [None]:
!wget https://raw.githubusercontent.com/rostyslavb/GazeEstimator/master/model/utils.py
!wget http://datasets.d2.mpi-inf.mpg.de/MPIIGaze/MPIIGaze.tar.gz
!tar -xzfv MPIIGaze.tar.gz

## Prepare data

### Gather Data from Structure

In [10]:
index, image, pose, gaze = gather_all_data('./MPIIGaze/Data/Normalized')

100%|██████████| 521/521 [00:15<00:00, 34.21it/s]


In [45]:
gaze = gaze3Dto2D(gaze)
pose = pose3Dto2D(pose)

In [46]:
def print_shapes(titles, items):
    for title, item in zip(titles, items):
        print((title+':').ljust(15) + str(item.shape))

print_shapes(['Indices', 'Images', 'Poses', 'Gazes'], (index, image, pose, gaze))

Indices:       (427316, 4)
Images:        (427316, 36, 60, 1)
Poses:         (427316, 2)
Gazes:         (427316, 2)


### Train/test split

Train/test split stratified by particapants and eyes (left, right).

In [47]:
random_state = 42
index_train, index_test = train_test_split(pd.DataFrame(index),
                                           stratify=index[:, [0, -1]],
                                           test_size=0.2,
                                           random_state=random_state)

index_train = index_train.index
index_test = index_test.index

**Train:**

In [48]:
print_shapes(['Indices', 'Images', 'Poses', 'Gazes'],
             (index[index_train], image[index_train], pose[index_train], gaze[index_train]))

Indices:       (341852, 4)
Images:        (341852, 36, 60, 1)
Poses:         (341852, 2)
Gazes:         (341852, 2)


**Test:**

In [49]:
print_shapes(['Indices', 'Images', 'Poses', 'Gazes'],
             (index[index_test], image[index_test], pose[index_test], gaze[index_test]))

Indices:       (85464, 4)
Images:        (85464, 36, 60, 1)
Poses:         (85464, 2)
Gazes:         (85464, 2)


## Create NN

In [6]:
from keras.layers import Input, Conv2D, MaxPool2D, Dense, Concatenate, Flatten, Dropout
from keras.initializers import RandomNormal, glorot_normal
from keras.models import Model
from keras import backend as K
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.models import Model

import tensorflow as tf

### Model

In [2]:
def calc_angle(vector1, vector2):
    
    def to_vector(array):

        x = (-1)*K.cos(array[:, 0]) * K.sin(array[:, 1])
        y = (-1)*K.sin(array[:, 0])
        z = (-1)*K.cos(array[:, 0]) * K.cos(array[:, 1])

        return tf.stack((x, y, z), axis=1)

    def calc_norm(array):
        return tf.norm(array, axis=1)
    
    v1, v2 = to_vector(vector1), to_vector(vector2)
    norm1, norm2 = calc_norm(vector1), calc_norm(vector2)

    angle_value = tf.divide(tf.reduce_sum(tf.multiply(v1, v2), axis=1),
                            tf.multiply(norm1, norm2))
    
    return tf.where(tf.abs(angle_value) >= 1.0, tf.pow(angle_value, -1), angle_value)

def angle_loss(target, predicted):
    return K.mean(1 - calc_angle(target, predicted))

def angle_accuracy(target, predicted):
    return K.mean(tf.acos(calc_angle(target, predicted)) * 180 / 3.14159265)


In [72]:
### LAYERS ###

# input
input_img = Input(shape=(36, 60, 1), name='InputNormalizedImage')
input_pose = Input(shape=(2,), name='InputHeadPose')

# convolutional
conv1 = Conv2D(filters=20,
               kernel_size=(5, 5),
               strides=(1, 1),
               kernel_initializer=RandomNormal(mean=0.0, stddev=0.01, seed=42),
               bias_initializer='zeros',
               name='conv1'
              )(input_img)
pool1 = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  padding='valid',
                  name='maxpool1'
                 )(conv1)
conv2 = Conv2D(filters=50,
               kernel_size=(5, 5),
               strides=(1, 1),
               kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=42),
               bias_initializer='zeros',
               name='conv2'
              )(pool1)
pool2 = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  padding='valid',
                  name='maxpool2'
                 )(conv2)

flatt = Flatten(name='flatt')(pool2)

# inner product 1
dense1 = Dense(units=500,
              activation='relu',
              kernel_initializer=glorot_normal(seed=42),
              bias_initializer='zeros',
              name='ip1'
             )(flatt)

# concatanate with head pose
cat = Concatenate(axis=-1, name='concat')([dense1, input_pose])

dropout = Dropout(rate=0.1)(cat)

# inner product 2
dense2 = Dense(units=2,
              kernel_initializer=glorot_normal(seed=42),
              bias_initializer='zeros',
              name='ip2'
             )(dropout)

### OPTIMIZER ###
adam = Adam(lr=1e-5)

### CALLBACKS ###
tbCallBack = TensorBoard(log_dir='./log',
                     histogram_freq=0,
                     write_graph=True,
                     write_images=True)
checkpoint = ModelCheckpoint('./checkpoints/', monitor='val_loss', period=10)
earlystop = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=10)

### COMPILE MODEL ###
model = Model([input_img, input_pose], dense2)
model.compile(optimizer=adam, loss=angle_loss, metrics=[angle_accuracy])

### Train

In [73]:
model.fit(x=[image[index_train], pose[index_train]], y=gaze[index_train],
          batch_size=500,
          verbose=1,
          epochs=1,
          validation_data=([image[index_test], pose[index_test]], gaze[index_test]),
          callbacks=[tbCallBack])

Train on 341852 samples, validate on 85464 samples
Epoch 1/1


<keras.callbacks.History at 0x12a485240>

In [91]:
model.save('./model_epoch1.h5')

In [None]:
from keras.models import load_model

scope_dict = {'angle_loss': angle_loss, 'angle_accuracy': angle_accuracy}

model = load_model('./model_epoch1.h5', custom_objects=scope_dict, compile=True)

# TODO estimator class or simple function?

In [48]:
def estimate_gaze(eye_image, head_pose):
    eye_image = eye_image.reshape((-1, 36, 60, 1))
    head_pose = head_pose.reshape((-1, 3))
    print(head_pose.shape)
    head_pose = pose3Dto2D(head_pose)
    gaze = model.predict([eye_image, head_pose])
    return gaze2Dto3D(gaze)

In [49]:
eye_image = image[0]
head_pose = pose[0]

In [51]:
estimate_gaze(eye_image, head_pose)

(1, 3)


array([[-0.04846469,  0.0699095 , -0.99637532]], dtype=float32)