In [95]:
#IF mediapipe not installed, uncomment these lines
#%%capture
#!pip install mediapipe


In [None]:
#Data Link
#https://www.kaggle.com/datasets/kameo4189/aflw2000-300wlp?select=AFLW2000-3D

In [96]:
import numpy as np
import os,cv2,math,glob,random
import scipy.io as sio
from math import cos, sin
from pathlib import Path
import pandas as pd
import mediapipe
import warnings

In [97]:
# Function to show pitch, yaw and roll
def draw_axis(img, pitch,yaw,roll, tdx=None, tdy=None, size = 50):

    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

In [98]:
#All Images
all_images = glob.glob('D:\ITI\Machine Learning Project\AFLW2000-3D\AFLW2000\*.jpg') 
#All Mat Files (Labels)
all_mats = glob.glob('D:\ITI\Machine Learning Project\AFLW2000-3D\AFLW2000\*.mat') # Labels
len(all_images),len(all_mats)

(2000, 2000)

In [99]:
# choosing random image
faceModule = mediapipe.solutions.face_mesh
x_all = []
y_all = []
noses = []
poses = []

# loading image and its correspinding mat file
for image in all_images:
    img = cv2.imread(image)
    with faceModule.FaceMesh(static_image_mode=True) as faces:
          results = faces.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
          if results.multi_face_landmarks != None:
            mat_data = sio.loadmat(image[:-3]+'mat')
            poses.append(mat_data['Pose_Para'][0][:3])
            x = []
            y = []
            nose = []
            # looping over the faces in the image
            for face in results.multi_face_landmarks:
                for idx, landmark in enumerate(face.landmark):
                    x.append(landmark.x)
                    y.append(landmark.y)
                    # getting the noses x and y coordinates
                    if idx == 1:
                      nose.append(landmark.x)
                      nose.append(landmark.y)
            #all x values
            x_all.append(x)
            #all y values
            y_all.append(y)
            # all nose value
            noses.append(nose)
#centering the x and y values around the nose
x_center = np.array(x_all) - np.array(x_all)[:,1].reshape(-1,1)
y_center = np.array(y_all) - np.array(y_all)[:,1].reshape(-1,1)
features_data = np.hstack([x_center,y_center])

In [100]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Lasso,Ridge
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score

In [101]:
#splitting the data
X_train, X_test, y_train, y_test = train_test_split(features_data , np.array(poses) ,test_size= 0.2, random_state= 42)

In [102]:
#Testing MultiOutputRegressor with SVR Estimator
svr = SVR()
mor = MultiOutputRegressor(svr)
mor.fit(X_train,y_train)
print('validation_mse_svr: ', mean_squared_error(y_test, mor.predict(X_test)))
print('validation_mae_svr: ', mean_absolute_error(y_test, mor.predict(X_test)))

validation_mse_svr:  0.015434564981853352
validation_mae_svr:  0.06778964861293812


In [103]:
#Testing MultiOutputRegressor with Ridge Estimator
rd = Ridge()
mor_ridge = MultiOutputRegressor(rd)
mor_ridge.fit(X_train,y_train)
print('validation_mse: ', mean_squared_error(y_test, mor_ridge.predict(X_test)))
print('validation_mae: ', mean_absolute_error(y_test, mor_ridge.predict(X_test)))

validation_mse:  0.0289758680921532
validation_mae:  0.1145048376412614


In [104]:

import pickle
import mediapipe as mp
# Save the trained model as a pickle string.
saved_model = pickle.dumps(mor)
  
# Load the pickled model
mor_from_pickle = pickle.loads(saved_model)

In [105]:
#Extracting all xs and ys for image to be predicted
def extract_features(img):
    x_all = []
    y_all = []
    noses = []
    faceModule = mediapipe.solutions.face_mesh
    with faceModule.FaceMesh(static_image_mode=True) as faces:
      results = faces.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
      xy = []
      nose = []
      # looping over the faces in the image
      for face in results.multi_face_landmarks:
          x = []
          y = []
          for idx, landmark in enumerate(face.landmark):
                  x.append(landmark.x)
                  y.append(landmark.y)
                  if idx == 1:
                    nose.append(landmark.x)
                    nose.append(landmark.y)
      x_all.append(x)
      y_all.append(y)
      noses.append(nose)
    return x_all,y_all,noses    

In [106]:
#Funtion to return predicted image with pitch, yaw and roll
def predict_image(image, show = False):
    x, y , noses = extract_features(image)
    x_center = np.array(x) - np.array(x)[:,1].reshape(-1,1)
    y_center = np.array(y) - np.array(y)[:,1].reshape(-1,1)
    features_data = np.hstack([x_center,y_center])
    prediction = mor_from_pickle.predict(features_data)
    draw_axis(image,prediction[0][0],prediction[0][1],prediction[0][2], noses[0][0]*image.shape[1], noses[0][1]*image.shape[0])
    if show:
        cv2.imshow("Test",image)
        cv2.waitKey(10000)
        cv2.destroyAllWindows()
    return image

In [107]:
#Predicting AFLW200 Image for testing
img = cv2.imread('D:\ITI\Machine Learning Project\AFLW2000-3D\AFLW2000\image00077.jpg')
predicted = predict_image(img, True)

In [108]:
#Video Prdiction
cap = cv2.VideoCapture('D:\ITI\Machine Learning Project\Billie.mp4')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv2.VideoWriter("new video.mp4", fourcc, 30.0, (width, height))
cv2.namedWindow("new Video", cv2.WINDOW_NORMAL)
while cap.isOpened():
    print('.', end='')
    ret, frame = cap.read()
    if not ret:
        break
    frame = predict_image(frame)
    out.write(frame)

cap.release()
out.release()
del out
cv2.destroyAllWindows() 

..........................................................................................................................