In [1]:
from scipy.io import loadmat
import numpy as np
import pandas as pd
from PIL import Image
import glob
from utils import *
from tqdm import tqdm
from sklearn.model_selection import train_test_split

## Prepare data

### Gather Data from Structure

In [2]:
index, image, pose, gaze = gather_all_data('./dataset/Data/Normalized')

100%|██████████| 521/521 [00:15<00:00, 33.74it/s]


In [3]:
image = image / 255
gaze = gaze3Dto2D(gaze)
pose = pose3Dto2D(pose)

In [4]:
def print_shapes(titles, items):
    for title, item in zip(titles, items):
        print((title+':').ljust(15) + str(item.shape))

print_shapes(['Indices', 'Images', 'Poses', 'Gazes'], (index, image, pose, gaze))

Indices:       (427316, 4)
Images:        (427316, 36, 60, 1)
Poses:         (427316, 2)
Gazes:         (427316, 2)


### Train/test split

In [None]:
random_state = 42
index_train, index_test, image_train, image_test, pose_train, pose_test, gaze_train, gaze_test = train_test_split(index, image, pose, gaze,
                                            stratify=index[:, [0, -1]],
                                            test_size=0.2,
                                            random_state=random_state)

**Train:**

In [None]:
print_shapes(['Indices', 'Images', 'Poses', 'Gazes'], (index_train, image_train, pose_train, gaze_train))

**Test:**

In [None]:
print_shapes(['Indices', 'Images', 'Poses', 'Gazes'], (index_test, image_test, pose_test, gaze_test))

## Create NN

In [5]:
from keras.layers import Input, Conv2D, MaxPool2D, Dense, Concatenate, Flatten, Dropout
from keras.initializers import RandomNormal
from keras.models import Model
from keras import backend as K
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
from keras.models import Model

import tensorflow as tf

Using TensorFlow backend.


### Layers

In [6]:
# input
input_img = Input(shape=(36, 60, 1), name='InputNormalizedImage')
input_pose = Input(shape=(2,), name='InputHeadPose')

# convolutional
conv1 = Conv2D(filters=20,
               kernel_size=(5, 5),
               strides=(1, 1),
               kernel_initializer=RandomNormal(mean=0.0, stddev=0.01, seed=None),
               bias_initializer='zeros',
               name='conv1'
              )(input_img)
pool1 = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  padding='valid',
                  name='maxpool1'
                 )(conv1)
conv2 = Conv2D(filters=50,
               kernel_size=(5, 5),
               strides=(1, 1),
               kernel_initializer=RandomNormal(mean=0.0, stddev=0.01, seed=None),
               bias_initializer='zeros',
               name='conv2'
              )(pool1)
pool2 = MaxPool2D(pool_size=(2, 2),
                  strides=(2, 2),
                  padding='valid',
                  name='maxpool2'
                 )(conv2)

flatt = Flatten(name='flatt')(pool2)

# inner product 1
dense1 = Dense(units=500,
              activation='relu',
              kernel_initializer='glorot_uniform',
              bias_initializer='zeros',
              name='ip1'
             )(flatt)

dropout = Dropout(rate=0.15)(dense1)

# concatanate with head pose
cat = Concatenate(axis=-1, name='concat')([dropout, input_pose])

# inner product 2
dense2 = Dense(units=2,
              kernel_initializer='glorot_uniform',
              bias_initializer='zeros',
              name='ip2'
             )(cat)

### Loss function

In [7]:
def loss(target, predicted):
#         // Accuracy
#     float data_x = (-1)*cos(bottom_data[i * 2 + 0])*sin(bottom_data[i * 2 + 1]);
#     float data_y = (-1)*sin(bottom_data[i * 2 + 0]);
#     float data_z = (-1)*cos(bottom_data[i * 2 + 0])*cos(bottom_data[i * 2 + 1]);
#     float norm_data = sqrt(data_x*data_x + data_y*data_y + data_z*data_z);
    
#     float label_x = (-1)*cos(bottom_label[i * 2 + 0])*sin(bottom_label[i * 2 + 1]);
#     float label_y = (-1)*sin(bottom_label[i * 2 + 0]);
#     float label_z = (-1)*cos(bottom_label[i * 2 + 0])*cos(bottom_label[i * 2 + 1]);
#     float norm_label = sqrt(label_x*label_x + label_y*label_y + label_z*label_z);

#     float angle_value = (data_x*label_x+data_y*label_y+data_z*label_z) / (norm_data*norm_label);
#     accuracy += (acos(angle_value)*180)/3.1415926;
    
#   // Accuracy
    data_x = K.cos(predicted[:, 0]) * K.sin(predicted[:, 1])
    data_y = K.sin(predicted[:, 0])
    data_z = K.cos(predicted[:, 0]) * K.cos(predicted[:, 1])
    norm_data = K.sqrt(data_x**2 + data_y**2 + data_z**2)
    
    label_x = K.cos(target[:, 0]) * K.sin(target[:, 1])
    label_y = K.sin(target[:, 0])
    label_z = K.cos(target[:, 0]) * K.cos(target[:, 1])
    norm_label = K.sqrt(label_x**2 + label_y**2 + label_z**2)

    angle_value = (data_x*label_x + data_y*label_y + data_z*label_z) / (norm_data*norm_label)
    
    return K.mean(tf.acos(angle_value) * 180 / 3.1415926)
    
    # return K.mean(K.sqrt(K.sum((target - predicted) ** 2, axis=-1)))

### Optimizer

In [8]:
adam = Adam(lr=0.00001, decay=1e-6)

### Compile

In [9]:
model = Model([input_img, input_pose], dense2)
model.compile(optimizer=adam, loss=loss)

### Callbacks

In [10]:
tbCallBack = TensorBoard(log_dir='./log',
                         histogram_freq=0,
                         write_graph=True,
                         write_images=True)

### Train

In [12]:
model.fit(x=[image, pose], y=gaze,
          batch_size=1000,
          shuffle=True,
          epochs=10,
          validation_split=0.2,
          callbacks=[tbCallBack])

Train on 341852 samples, validate on 85464 samples
Epoch 1/10
 15000/341852 [>.............................] - ETA: 26:26 - loss: nan 

KeyboardInterrupt: 