<a href="https://colab.research.google.com/github/lauren-safwat/Head-Pose-Estimation/blob/main/Head_Pose_Estimation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Installing MediaPipe

In [26]:
%%capture
!pip install mediapipe

### Imports

In [27]:
import numpy as np
import pandas as pd
import mediapipe as mp
import scipy.io as sio
import cv2
import os
import math
import glob
import random
from pathlib import Path
from google.colab.patches import cv2_imshow

from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score,f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import warnings
warnings.filterwarnings('ignore')

### Downloading Data

In [28]:
# Loading AFLW2000 Data
%%capture
if os.path.isfile('/content/AFLW2000-3D.zip') == False:
  !gdown --id  1fP3zvSCYjll_o_m7S12nvQLZ9MnsEoap
  !unzip /content/AFLW2000-3D.zip

### **Data Preparation**

In [29]:
mp_faceModule = mp.solutions.face_mesh

# Load drawing_utils and drawing_styles
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles

In [30]:
def createRecord(image, mat_file=None):
    with mp_faceModule.FaceMesh(static_image_mode=True) as face_mesh:
        # Convert the BGR image to RGB before processing.
        results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        records = []
        if results.multi_face_landmarks != None: 
            # Looping over the faces in the image
            for face in results.multi_face_landmarks:
                record = {}

                # Extracting the landmark points (468 points)
                for i, landmark in enumerate(face.landmark):
                    record['x_' + str(i+1)] = landmark.x
                    record['y_' + str(i+1)] = landmark.y

                # Extracting the 3 angels of the face (pitch, yaw, roll)
                if mat_file:
                    pose_para = mat_file["Pose_Para"][0][:3]
                    record['pitch'] = pose_para[0]
                    record['yaw'] = pose_para[1]
                    record['roll'] = pose_para[2]

                records.append(record)
    
    return records
            

In [31]:
def prepareTrainData(path):
    data = []
    for filename in glob.glob(path + '/*.jpg'):
        # Loading the image and its corresponding mat file
        image = cv2.imread(filename)
        mat_file = sio.loadmat(filename.replace('.jpg', '.mat'))
        data.extend(createRecord(image, mat_file))

    return pd.DataFrame(data)                

In [32]:
path = '/content/AFLW2000'
train_data = prepareTrainData(path)

In [33]:
X = train_data.iloc[:, :-3]
y = train_data.iloc[:, -3:]

In [34]:
X.head()

Unnamed: 0,x_1,y_1,x_2,y_2,x_3,y_3,x_4,y_4,x_5,y_5,...,x_464,y_464,x_465,y_465,x_466,y_466,x_467,y_467,x_468,y_468
0,0.543208,0.699374,0.577407,0.63226,0.548907,0.647842,0.550986,0.556556,0.581944,0.611347,...,0.565472,0.488038,0.561025,0.494831,0.562877,0.500676,0.635171,0.476615,0.641297,0.471475
1,0.449318,0.623106,0.438217,0.555571,0.453033,0.584886,0.438084,0.511398,0.438144,0.537558,...,0.534395,0.490127,0.520145,0.492032,0.509019,0.49173,0.624954,0.484638,0.639041,0.475861
2,0.536798,0.673666,0.568343,0.605853,0.540946,0.627751,0.540447,0.540982,0.571973,0.585368,...,0.557607,0.499001,0.553109,0.502464,0.55437,0.504928,0.625219,0.498331,0.632393,0.489432
3,0.502775,0.68644,0.470461,0.625865,0.49116,0.642107,0.449308,0.557135,0.464042,0.604852,...,0.526728,0.481417,0.514784,0.489186,0.505943,0.494994,0.625982,0.443524,0.635505,0.430933
4,0.454915,0.643061,0.44174,0.598905,0.461118,0.613343,0.45583,0.551114,0.442383,0.58494,...,0.535493,0.526638,0.525722,0.528086,0.51688,0.529739,0.604313,0.539936,0.61572,0.534194


In [35]:
y.head()

Unnamed: 0,pitch,yaw,roll
0,-0.225239,-0.460198,0.147923
1,0.38893,0.319857,0.066105
2,-0.060874,-0.514415,0.110057
3,-0.128483,0.239956,-0.232144
4,-0.240427,0.718242,0.156163


### **Model Training**

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
svr = MultiOutputRegressor(SVR(kernel='rbf'))
svr.fit(X_train, y_train)

MultiOutputRegressor(estimator=SVR())

In [38]:
preds = svr.predict(X_test)

In [39]:
r2_score(y_test, preds)

0.8576040388686536