# Head Position Detector (AI6 Session 3)

The objective is to detect where a person is looking from an image of their face.

The dataset to be used for training the network is the [Head Pose Image Dataset](http://www-prima.inrialpes.fr/perso/Gourier/Faces/HPDatabase.html)

In [1]:
# Dependencies and imports

import cv2  # OpenCV 2 for capturing frames from the video
import os  # For managing paths and directories in the project
import shutil  # High level file operations
import numpy as np  # Arrays
import keras  # High level NN API
from PIL import Image, ImageOps # For image processing
from pathlib import Path  # For easily managing paths
from IPython import display  # For displaying images inline with the notebook
from sklearn.model_selection import train_test_split  # For train-test splitting
from tqdm import tqdm
import re
import requests
import pandas as pd
import glob

Using TensorFlow backend.


In [2]:
# Download the dataset
import requests
url = 'http://www-prima.inrialpes.fr/perso/Gourier/Faces/HeadPoseImageDatabase.tar.gz'
name = 'HeadPoseImageDatabase.tar.gz'
r = requests.get(url, allow_redirects=True)
open(name, 'wb').write(r.content)

28512828

In [0]:
# Untar and unzip the dataset
!tar xfz HeadPoseImageDatabase.tar.gz

In [0]:
def img_df(image_path, shape):
    image = Image.open(image_path)
    image_resized = image.resize(shape, Image.ANTIALIAS)
    img_array = np.asarray(image_resized)
    return img_array

In [0]:
# Formar X e Y recorriendo las carpetas y ficheros
df = pd.DataFrame()
regex = r'(\W\d+)(\W\d+)'
jpg_list = []

for txt_path in glob.glob("Person*/*txt"):
    jpg_path = txt_path[:-3] + "jpg"
    jpg_list.append(jpg_path)
    df = df.append(pd.read_csv(txt_path, header=None).T)

df = df.drop(1, axis=1)
df["T"] = df[0].apply(lambda cell: int(re.findall(regex, cell)[0][0]))
df["P"] = df[0].apply(lambda cell: int(re.findall(regex, cell)[0][1]))
df["Image_path"] = jpg_list
df["Image"] = df['Image_path'].apply(lambda img: img_df(img, (224, 224)))
df = df.drop([0, "Image_path"], axis=1)
df.columns = ["X", "Y", "H", "W", "T", "P", "Image"]
df.X = df.X.astype(int)
df.Y = df.Y.astype(int)
df.H = df.H.astype(int)
df.W = df.W.astype(int)
df = df.reset_index().drop("index", axis=1)

In [0]:
# Extraer X e Y del df y normalizar
X = np.asarray(list(df["Image"]/255.))
Y = np.array(df[["X", "Y", "H", "W", "T", "P"]])/100.

In [7]:
# Normalizar X e Y
X.shape

(2790, 224, 224, 3)

In [8]:
# Crear red neuronal
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.models import Model


n_classes = 6
base_model = MobileNet(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last 1000 neuron layer.

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
x = Dense(1024,activation='relu')(x) #dense layer 2
x = Dense(512,activation='relu')(x) #dense layer 3

preds = Dense(n_classes,activation='linear')(x)

model=Model(inputs=base_model.input,outputs=preds)


















Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_224_tf_no_top.h5


In [0]:
# Entrenar red neuronal
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33)

In [0]:
for layer in model.layers[:20]:
    layer.trainable=False
for layer in model.layers[20:]:
    layer.trainable=True

In [11]:
model.compile(optimizer = 'Adam',
              loss ='mean_squared_error',
              metrics = ['cosine_proximity'])




In [12]:

model.fit(X_train, Y_train, validation_data=[X_test, Y_test], epochs=10, verbose=1)



Train on 1869 samples, validate on 921 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fae504ea390>

In [18]:
from sklearn.metrics import r2_score

y_pred = model.predict(X_test)
for a in range(0,6):
  print(r2_score(Y_test[:,a], y_pred[:,a]))



0.7414875560542109
0.5520262966071228
-2.314769619165437
-2.7074629659794507
0.8711233010900373
0.8405459284545028


In [19]:
# Como para la aplicación que se pensaba no interesan Height y Width observamos solo las otras cuatro

r2_score(Y_test[:,[0,1,4,5]], y_pred[:,[0,1,4,5]])


0.7512957705514685