In [1]:
import cv2  # OpenCV 2 for capturing frames from the video
import os  # For managing paths and directories in the project
import shutil  # High level file operations
import numpy as np  # Arrays
import keras  # High level NN API
from PIL import Image, ImageOps # For image processing
from pathlib import Path  # For easily managing paths
from IPython import display  # For displaying images inline with the notebook
from sklearn.model_selection import train_test_split  # For train-test splitting
from tqdm import tqdm
import re
import requests
import pandas as pd
import glob

Using TensorFlow backend.


In [2]:
# Download image dataset
import requests
url = 'http://www-prima.inrialpes.fr/perso/Gourier/Faces/HeadPoseImageDatabase.tar.gz'
file = 'HeadPoseImageDatabase.tar.gz'
r = requests.get(url, allow_redirects=True)
open(file, 'wb').write(r.content)


28512828

In [0]:
# Uncompress the dataset, several folders created, one per person
!tar xfz HeadPoseImageDatabase.tar.gz

In [0]:
df = pd.DataFrame()
# two character groups,  composed of a non-word and a number of at least one digit
regex = r'(\W\d+)(\W\d+)'
jpg_list = []
# loop for every Person folder and every txt file in it and obtain jpg list in order
for txt_path in glob.glob("Person*/*txt"):
    jpg_path = txt_path[:-3] + "jpg"
    jpg_list.append(jpg_path)
    df = df.append(pd.read_csv(txt_path, header=None).T)

In [5]:
# In the dataframe we now have the contents of the txt file: name, content, Xcoord, Ycoord, Width, Height
df.head()

Unnamed: 0,0,1,2,3,4,5
0,person05242+0-60.jpg,Face,204,130,92,100
0,person05225-30+75.jpg,Face,44,172,70,97
0,person05166+30-90.jpg,Face,186,88,85,102
0,person05160+15+15.jpg,Face,120,108,101,101
0,person05117-30-45.jpg,Face,223,130,96,101


In [6]:
df = df.drop(1, axis=1)
# Show example of file names to extract pattern
df[0]

0     person05242+0-60.jpg
0    person05225-30+75.jpg
0    person05166+30-90.jpg
0    person05160+15+15.jpg
0    person05117-30-45.jpg
             ...          
0    person13153+15-90.jpg
0    person13132-15-15.jpg
0    person13213-60+90.jpg
0    person13290+60+75.jpg
0    person13283+60-30.jpg
Name: 0, Length: 2790, dtype: object

In [0]:
#Extract tilt value, first number in the file name, and Pan value, second number in the file name
df["T"] = df[0].apply(lambda name: int(re.findall(regex, name)[0][0]))
df["P"] = df[0].apply(lambda name: int(re.findall(regex, name)[0][1]))

In [0]:
def img2array(image_path, shape):
    image = Image.open(image_path)
    image_resized = image.resize(shape, Image.ANTIALIAS)
    img_array = np.asarray(image_resized)
    return img_array


df["imgpath"] = jpg_list
df["Image"] = df['imgpath'].apply(lambda img: img2array(img, (224, 224)))

In [0]:
df = df.drop([0, "imgpath"], axis=1)
df.columns = ["X", "Y", "H", "W", "T", "P", "Image"]
df.X = df.X.astype(int)
df.Y = df.Y.astype(int)
df.H = df.H.astype(int)
df.W = df.W.astype(int)

In [10]:
df.head()

Unnamed: 0,X,Y,H,W,T,P,Image
0,204,130,92,100,0,-60,"[[[22, 22, 24], [16, 16, 18], [53, 53, 55], [1..."
0,44,172,70,97,-30,75,"[[[21, 21, 21], [14, 14, 14], [57, 57, 57], [1..."
0,186,88,85,102,30,-90,"[[[21, 21, 21], [15, 15, 15], [55, 55, 55], [1..."
0,120,108,101,101,15,15,"[[[21, 21, 21], [16, 16, 16], [54, 54, 54], [1..."
0,223,130,96,101,-30,-45,"[[[22, 22, 22], [16, 16, 16], [55, 55, 55], [1..."


In [0]:
# Order the dataframe with an index
df = df.reset_index().drop("index", axis=1)

In [0]:
# Get X and y, which will be multidimensional, and normalize in y, deminish scale in X
X = np.asarray(list(df["Image"]/255.))
y = np.array(df[["X", "Y", "H", "W", "T", "P"]])/100.

In [13]:
# Explore X to address the input on the CNN
X.shape

(2790, 224, 224, 3)

In [0]:
# Split train/test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [17]:
from keras.layers import Dense,GlobalAveragePooling2D
from keras.applications import MobileNet
from keras.preprocessing import image
from keras.models import Model


n_classes = 6
base_model = MobileNet(weights='imagenet',include_top=False) #imports the mobilenet model and discards the last layer

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024,activation='relu')(x) 
x = Dense(1024,activation='relu')(x) 
x = Dense(512,activation='relu')(x) 

preds = Dense(n_classes,activation='linear')(x)

model=Model(inputs=base_model.input,outputs=preds)

model.compile(optimizer = 'Adam',
              loss ='mean_squared_error',
              metrics = ['cosine_proximity'])







Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_224_tf_no_top.h5


In [19]:
model.fit(X_train, y_train, validation_data=[X_test, y_test], epochs=12, verbose=1, batch_size= 100)

Train on 1869 samples, validate on 921 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f29d36c2fd0>