In [1]:
import numpy as np
from utils import mpigaze

In [2]:
normalized_data_path = '../data/MPIIGaze/Data/Normalized/'

train_data, validation_data, test_data = mpigaze.load_dataset(normalized_data_path)

n_train_samples, image_height, image_width, n_channels = train_data['img'].shape

n_mini_train = 20000
mini_train_subset = np.random.choice(n_train_samples, n_mini_train, replace=False)

loading data for person 0
loading data for person 1
loading data for person 2
loading data for person 3
loading data for person 4
loading data for person 5
loading data for person 6
loading data for person 7
loading data for person 8
loading data for person 9
loading data for person 10
loading data for person 11
loading data for person 12
loading data for person 13
loading data for person 14


In [13]:
import tensorflow as tf
import keras
import numpy as np

from keras import backend as K
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Flatten, Activation, Lambda
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.layers.merge import concatenate

from models import vgg

In [145]:
def gaze_bivgg(image_height, image_width, n_channels, name='gaze_bivgg'):

    eye_img = Input(shape=[image_height, image_width, n_channels], name='eye_img')
    head_pose = Input(shape=[3], name='head_pose')

    vgg_img = vgg.vgg_model(final_layer=False,
                            image_height=image_height,
                            image_width=image_width,
                            n_channels=n_channels)(eye_img)
    
    img_hpose = concatenate([vgg_img, head_pose])
    
    final_seq = Dense(512, activation='relu')(Dense(512, activation='relu')(img_hpose))
    
    angle = Lambda(lambda x: K.l2_normalize(x, axis=1), name='yaw_angle')(Dense(2)(final_seq))
    #pitch = Lambda(lambda x: K.l2_normalize(x, axis=1), name='pitch_angle')(Dense(2)(final_seq))
    
    #yaw_pitch = concatenate([yaw, pitch])

    model = Model([eye_img, head_pose], angle, name='gaze_bivgg')
    
    model.compile(optimizer='Adam', loss='mse')
    
    return model

In [162]:
yaw_model = gaze_bivgg(image_height, image_width, n_channels)
pitch_model = gaze_bivgg(image_height, image_width, n_channels)

In [164]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

ckpt_path = '../logs/eye_yaw_model.ckpt'

early_stop_cb = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1, mode='auto')
model_ckpt = keras.callbacks.ModelCheckpoint(ckpt_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

yaw_model.fit([train_data['img'][mini_train_subset], train_data['hpose'][mini_train_subset]], train_data['yaw_bit'][mini_train_subset],
                validation_data=[[validation_data['img'], validation_data['hpose']], validation_data['yaw_bit']],
                epochs=10,
                callbacks=[early_stop_cb, model_ckpt])

yaw_model.load_weights(ckpt_path)

Train on 10000 samples, validate on 1498 samples
Epoch 1/10
  512/10000 [>.............................] - ETA: 71s - loss: 0.0633

KeyboardInterrupt: 

In [149]:
yaw_model.load_weights(ckpt_path)

In [155]:
val_yaw_preds = yaw_model.predict([validation_data['img'], validation_data['hpose']])
val_yaw_preds_degs = bit2deg(val_yaw_preds)

# val_pitch_preds_degs = bit2deg(val_preds[:, 2:])

In [157]:
val_yaw_preds_degs

array([ 109.7618103 ,   58.22323608,  110.92883301, ...,  142.97433472,
        133.8727417 ,  152.17456055], dtype=float32)

In [158]:
validation_data['yaw_deg']

array([ 162.60485937,   51.45169118,  166.75862016, ...,  140.93112824,
        126.81633207,  154.52741986])

In [159]:
from utils.angles import bit2deg
from utils.losses import maad_from_deg

In [111]:
mean_pitch_preds = np.ones(validation_data['pitch_deg'].shape[0])*np.mean(validation_data['pitch_deg'])
mean_yaw_preds = np.ones(validation_data['yaw_deg'].shape[0])*np.mean(validation_data['yaw_deg'])

In [160]:
mean_yaw_preds

array([ 93.90927926,  93.90927926,  93.90927926, ...,  93.90927926,
        93.90927926,  93.90927926])

In [161]:
np.mean(maad_from_deg(val_yaw_preds_degs, validation_data['yaw_deg']))

24.444834145743176

In [144]:
np.mean(maad_from_deg(val_pitch_preds_degs, validation_data['pitch_deg']))

3.7269034109068873