In [1]:
import mediapipe as mp
import cv2
import scipy.io as sio
import matplotlib.pyplot as plt
import os
import pandas as pd

In [2]:
dir_path = 'AFLW2000/'
paths = os.listdir(dir_path)
images_paths = [x for x in paths if x.split('.')[1] == 'jpg']
mats_paths = [x for x in paths if x.split('.')[1] == 'mat']

In [3]:
# img = cv2.imread('image00013.jpg')
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# sio.loadmat('image00013')
# plt.imshow(img)

In [4]:
# # FaceMesh has two real-time deep neural network models that work together: 
# # 1st --> Detector Model that operates on the full image and computes face locations
# # 2nd --> Face Landmark Model that operates on those locations and predicts the approximate 3D surface via regression
# faceModule = mp.solutions.face_mesh
# face = faceModule.FaceMesh(static_image_mode=True)
# # Load the image
# image = cv2.imread('image04355.jpg')
# # Process the image to extract the points (468 point) --> Landmarks ---- len=1
# # Which is a list of 1 item maybe a data structure contains all landmarks
# results = face.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# if results.multi_face_landmarks != None:
#     # Loop over faces in the image
#     for face in results.multi_face_landmarks:
#         # Loop over lanmarks in every face
#         for landmark in face.landmark:
#             # Every landmark has x and y
#             x = landmark.x
#             y = landmark.y
#             # Note: x & y values are scaled to the their width and height maybe face has its own coordinate system
#             # So we will transform to coordinate system in the whole image
#             shape = image.shape
#             rel_x = int(x * shape[1])
#             rel_y = int(y * shape[0])
#             # To draw a circle of points on the face for every landmark --> Note the thickness
#             cv2.circle(img=image, center=(rel_x, rel_y), radius=1, color=(0, 255, 0), thickness=2)
#     plt.imshow(image)

In [5]:
raw_df = pd.DataFrame(columns=list(range(1, (468*2)+1)) + ['Yaw', 'Pitch', 'Roll'])

In [40]:
# FaceMesh has two real-time deep neural network models that work together: 
# 1st --> Detector Model that operates on the full image and computes face locations
# 2nd --> Face Landmark Model that operates on those locations and predicts the approximate 3D surface via regression
i = 0
faceModule = mp.solutions.face_mesh
for img_path, mat_path in zip(images_paths, mats_paths):
    lst = []
    with faceModule.FaceMesh(static_image_mode=True) as face:
        # Load the image
        image = cv2.imread(dir_path + img_path)
        # Process the image to extract the points (468 point) --> Landmarks ---- len=1
        # Which is a list of 1 item maybe a data structure contains all landmarks
        results = face.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        if results.multi_face_landmarks != None:
            # Loop over faces in the image
            for face in results.multi_face_landmarks:
                # Loop over lanmarks in every face
                for landmark in face.landmark:
                    # Every landmark has x and y
                    x = landmark.x
                    y = landmark.y
                    lst.append(x)
                    lst.append(y)

                # Load the mat file
                lst = lst + list(sio.loadmat(dir_path+mats_paths[0])['Pose_Para'][0][:3])

                raw_df.loc[i] = lst
                i += 1

In [41]:
# Chech loading data into DataFrame
print(raw_df.shape)
raw_df.head(3)

(1853, 939)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,930,931,932,933,934,935,936,Yaw,Pitch,Roll
0,0.485651,0.686891,0.489194,0.639722,0.489803,0.647257,0.47973,0.562529,0.490558,0.621214,...,0.492813,0.541429,0.500371,0.665669,0.471336,0.676091,0.464005,-0.399231,0.018227,0.085676
1,0.441502,0.642091,0.417273,0.592251,0.448839,0.610546,0.435994,0.545069,0.415757,0.576966,...,0.511724,0.500658,0.514913,0.584451,0.500244,0.59406,0.495387,-0.399231,0.018227,0.085676
2,0.317797,0.79919,0.321328,0.776498,0.318316,0.782066,0.311064,0.745973,0.321459,0.767816,...,0.720868,0.328639,0.723342,0.373404,0.713071,0.377643,0.709798,-0.399231,0.018227,0.085676


In [42]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split

In [43]:
features = raw_df.drop(['Yaw', 'Pitch', 'Roll'], axis=1)
labels = raw_df[['Yaw', 'Pitch', 'Roll']]

In [44]:
features_train, features_validation, labels_train, labels_validation = train_test_split(features, labels, test_size=0.15, random_state=42)

In [45]:
print(len(features_train))
print(len(features_validation))
print(len(labels_train))
print(len(labels_validation))

1575
278
1575
278


In [46]:
svr = MultiOutputRegressor(SVR(gamma=10)).fit(features_train, labels_train)

In [47]:
svr.score(features_validation, labels_validation)

1.0