### Deep Learning in the Eye Tracking World 
#### the tutorial presented during ETRA 2021 (https://etra.acm.org/2021/acceptedtutorials.html)
#### the code downloaded from: https://github.com/kasprowski/etra2021
@author: pawel@kasprowski.pl


# Find gaze coordinates from eye images

In [3]:
import os
import cv2
import numpy as np

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, MaxPooling2D, Conv2D
from tensorflow.keras.models import Sequential

## Download the dataset (eye images)

In [1]:
import requests
import zipfile
r = requests.get("http://www.kasprowski.pl/tutorial/eye_left.zip", allow_redirects=True)
open('eye_left.zip', 'wb').write(r.content)
print("Downloaded eye_left.zip")
with zipfile.ZipFile("eye_left.zip","r") as zip_ref:
    zip_ref.extractall("eye_left")
print("Uzipped to /eye_left directory")

Downloaded eye_left.zip
Uzipped to /eye_left directory


# Load all images from /eye_left
### the coordinates are derived from file name

In [4]:
def load_images(indir):
    samples = []
    labels = []
    for imagePath in os.listdir(indir):
        image = cv2.imread("{}/{}".format(indir,imagePath))
        image = preprocess(image)
        samples.append(image)
        label = np.zeros((2))
        label[0] = float(imagePath[0:imagePath.find("_")])
        label[1] = float(imagePath[imagePath.find("_")+1:imagePath.find(".")])
        labels.append(label)
    samples = np.array(samples, dtype="float")
    labels = np.array(labels)
    return samples,labels

# Resize each image to (64,64) and then mask the image with ellipse
def preprocess(image):
    image = cv2.resize(image, (64, 64))
    mask = np.zeros_like(image)
    cols, rows,_ = mask.shape
    mask=cv2.ellipse(mask, center=(rows//2, cols//2), axes=(28,14), 
                     angle=0, startAngle=0, endAngle=360, 
                     color=(255,255,255), thickness=-1)
    result = np.bitwise_and(image,mask)
    result = result[14:64-14,:]
    return result

print("Loading images...")
samples,labels = load_images("eye_left")
print(samples.shape)
print(f"Loaded {samples.shape[0]} images")

Loading images...
(200, 36, 64, 3)
Loaded 200 images


## Build the model

In [5]:
def build_cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (7, 7), padding="same",input_shape=(36,64,3)))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (5, 5), padding="same"))
    model.add(Activation("relu"))
    
    model.add(Conv2D(64, (5, 5), padding="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation("relu"))
    #model.add(Dropout(0.5))
    model.add(Dense(2)) # two values X,Y
    return model

model = build_cnn_model()
model.compile(loss="mean_absolute_error", optimizer="adam", metrics=["mae"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 36, 64, 32)        4736      
_________________________________________________________________
activation (Activation)      (None, 36, 64, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 18, 32, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 18, 32, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 18, 32, 64)        51264     
_________________________________________________________________
activation_1 (Activation)    (None, 18, 32, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 18, 32, 64)        1

In [6]:
if not os.path.exists("eye_models"):
    os.makedirs("eye_models")
if not os.path.exists("eye_images"):
    os.makedirs("eye_images")
    
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels, test_size=0.15, random_state=42)
checkpt1 = ModelCheckpoint(filepath='eye_models/model.{epoch:02d}-{val_loss:.2f}.h5', save_best_only=True)

# train
EPOCHS = 100
for e in range(EPOCHS):
    print("=" * 50)
    print("Iteration: {}".format(e))
    H = model.fit(trainSamples, trainLabels, validation_data=(testSamples, testLabels), epochs=1, batch_size=32, verbose=0,
                  callbacks=[checkpt1])
                  
    #print(H.history)              
    print("Error so far: {}".format(H.history["val_mae"]))

    predictions = model.predict(testSamples, batch_size=32)
    results = np.zeros((800,1500,3))
    dx = 0
    dy = 0
    # create an image with current predictions
    for i in range(testSamples.shape[0]):
        cv2.circle(results,(int(testLabels[i,0]),int(testLabels[i,1])),10,(0,255,0),3) # true gaze points
        cv2.circle(results,(int(predictions[i,0]),int(predictions[i,1])),10,(255,0,0),3) # predicted gaze point
        cv2.line(results, 
                 (int(predictions[i,0]),int(predictions[i,1])), # line connecting ground truth with prediction
                 (int(testLabels[i,0]),int(testLabels[i,1])), 
                 (255,0,0),3
                 )
        dx = dx + abs(testLabels[i,0] - predictions[i,0])
        dy = dy + abs(testLabels[i,1] - predictions[i,1])
        
    dx = dx/testSamples.shape[0]
    dy = dy/testSamples.shape[0]
        
    cv2.putText(results,"ErrX: {0:.0f}".format(dx),(10,30),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)    
    cv2.putText(results,"ErrY: {0:.0f}".format(dy),(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.8,(255,255,255),2)    
    cv2.imwrite("eye_images/test_{:02d}.jpg".format(e),results)
 
print("Final MAE: {}".format(mean_absolute_error(testLabels,predictions)))    

model.save("final_model_{0:.2f}.h5".format(mean_absolute_error(testLabels,predictions)))

Iteration: 0
Error so far: [322.9339294433594]
Iteration: 1
Error so far: [295.0889587402344]
Iteration: 2
Error so far: [301.2847595214844]
Iteration: 3
Error so far: [270.3288879394531]
Iteration: 4
Error so far: [283.8215637207031]
Iteration: 5
Error so far: [262.9568786621094]
Iteration: 6
Error so far: [247.29052734375]
Iteration: 7
Error so far: [223.92144775390625]
Iteration: 8
Error so far: [192.7815399169922]
Iteration: 9
Error so far: [212.13674926757812]
Iteration: 10
Error so far: [236.05064392089844]
Iteration: 11
Error so far: [176.7724151611328]
Iteration: 12
Error so far: [170.10020446777344]
Iteration: 13
Error so far: [149.5454864501953]
Iteration: 14
Error so far: [108.79581451416016]
Iteration: 15
Error so far: [121.14583587646484]
Iteration: 16
Error so far: [130.9051971435547]
Iteration: 17
Error so far: [109.44792938232422]
Iteration: 18
Error so far: [98.08070373535156]
Iteration: 19
Error so far: [94.60517120361328]
Iteration: 20
Error so far: [77.5677261352539