In [None]:
!pip install opencv-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install mediapipe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.8.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.8 MB)
[K     |████████████████████████████████| 32.8 MB 1.4 MB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.10


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
OutPATH="/content/drive/MyDrive/Machine Learning 1 project/AFLW2000"

## Essential Imports

In [None]:
import cv2
import mediapipe as mp
from google.colab.patches import cv2_imshow
import scipy.io as sio
from pathlib import Path
import glob
import pandas as pd
import numpy as np

## Function to get Landmarks

In [None]:
def GetLandmarksOfImage(image):

    xlist, ylist, first_point, second_point = [], [], [], []
      
    faceModule = mp.solutions.face_mesh

    # facemesh--> To get landmarks of each face
    with faceModule.FaceMesh(static_image_mode=True) as facemesh:
        
        # processing the image to extract the landmark points (468 point) for each x,y,z
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # result --> resulting face mesh 
        result = facemesh.process(rgb_image)

        #check if the landmarks extracted are None
        if result.multi_face_landmarks != None: 
          #Extract landmarks from resulting face mesh 
          for face_landmarks in result.multi_face_landmarks:
              for landmark in face_landmarks.landmark:
                  # x and y here are scaled with width and height of the image
                  x = landmark.x
                  y = landmark.y
                  shape = image.shape 
            
                  #  we will get back to x and y actual values in the image
                  relative_x = int(x * shape[1])
                  relative_y = int(y * shape[0])

                  #These xlist and ylist are supposed to save each of x and y points of all landmarks of certain face in separate lists 
                  #xinndexes= [0, 1, ............... 468]  
                  #yindexes = [0, 1, ............... 468]
                  xlist.append(relative_x)
                  ylist.append(relative_y)

                  # Point location: near nose 
                  if landmark == face_landmarks.landmark[1]:
                    first_point.append(relative_x)
                    first_point.append(relative_y)
                    # cv2.circle(image, (relative_x, relative_y), radius=0, color=(0, 0, 0), thickness=5)

                  # Point location: at the top right of the face
                  if landmark == face_landmarks.landmark[27]:
                    second_point.append(relative_x)
                    second_point.append(relative_y)
                    # cv2.circle(image, (relative_x, relative_y), radius=0, color=(0, 0, 0), thickness=5)
                    
                    # cv2_imshow(image)

    return xlist, ylist, first_point, second_point

## The function below is to center and normalize data points

In [None]:
def Normalize_Landmarks(Landmarks):
  #Get x,y of first point
  x_fp, y_fp = Landmarks[2][0], Landmarks[2][1]

  #Get x,y of second point
  x_sp, y_sp = Landmarks[3][0], Landmarks[3][1]

  # Get distance between first point and second point to normalize landmarks
  a = np.array([x_fp, y_fp])
  b = np.array([x_sp, y_sp])

  distance = np.linalg.norm(a-b)

  centered_normalized_landmarks_x = (np.array(Landmarks[0]) - x_fp) / distance 
  centered_normalized_landmarks_y = (np.array(Landmarks[1]) - y_fp) / distance

  return centered_normalized_landmarks_x, centered_normalized_landmarks_y

## 'Getpose' function to get yaw, pitch, roll of each image from its mat file

In [None]:
def Getpose(mat_file):
  # extracting the labels 3 angels
  pose_para = mat_file["Pose_Para"][0][:3]
  pitch = pose_para[0]
  yaw   = pose_para[1]
  roll  = pose_para[2]
  return [yaw, pitch, roll]

## Create Data Frame

In [None]:
xlistnames=[]
ylistnames=[]
for i in range(468):
  xlistnames.append('x_'+str(i))
  ylistnames.append('y_'+str(i))
yprlist=['yaw', 'pitch', 'roll']

In [None]:
df = pd.DataFrame(columns = xlistnames + ylistnames + yprlist)

## Iterate over all images and build the dataframe 

In [None]:
#Extract name of each image in AFLW2000 dataset
Images_Names = [Path(f).stem for f in glob.glob("/content/drive/MyDrive/Machine Learning 1 project/AFLW2000/*.mat")]
for imagename in Images_Names:
  img = cv2.imread(OutPATH + '/' + imagename + '.jpg')

  x = GetLandmarksOfImage(img)

  if x[0]==[] or x[1]==[] or x[2]==[] or x[3]==[]:
    continue

  x_landmarks, y_landmarks = Normalize_Landmarks(x)

  mat_file = sio.loadmat(OutPATH + '/' + imagename + '.mat')
  
  #Append data of each sample (image) to the row of dataframe
  df.loc[len(df)] = list(x_landmarks) + list(y_landmarks) + Getpose(mat_file)

## Split the data to train and test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[xlistnames + ylistnames], df[yprlist], test_size=0.2, random_state=42)

## Linear Regression model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
multioutputregressor = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [None]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.5061045282244736
MSE is 0.5254191475162356
R2 score is -4.5642564370282335


# ADABOOST model

In [None]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import AdaBoostRegressor

multioutputregressor = MultiOutputRegressor(AdaBoostRegressor(random_state=42, n_estimators=100)).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [None]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.14249552110171915
MSE is 0.04845519343190121
R2 score is 0.5941049327577422


## XGBOOST model 

In [None]:
from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb

multioutputregressor = MultiOutputRegressor(xgb.XGBRegressor(objective='reg:squarederror')).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [None]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.08672444096333305
MSE is 0.03409495729153592
R2 score is 0.7082448095025625


## SVR model "Best R2Score"

In [None]:
from sklearn.svm import SVR
multioutputregressor = MultiOutputRegressor(SVR()).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [None]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score of test set is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.07066728893183201
MSE is 0.022193704487171187
R2 score of test set is 0.8243518889876936


## Draw axis with angles (yaw, pitch, roll)

In [None]:
from math import cos, sin

def draw_axis(img, pitch,yaw,roll, tdx=None, tdy=None, size = 100):

    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

## Upload test video

In [None]:
from google.colab import files
uploaded = files.upload()

Saving WIN_20220617_00_50_58_Pro.mp4 to WIN_20220617_00_50_58_Pro.mp4


# Test model on video 

In [None]:
import cv2
cap = cv2.VideoCapture('WIN_20220617_00_50_58_Pro.mp4')
video_name = 'Out_Video.avi'
video = cv2.VideoWriter(video_name, 0, 10, (1280,720))
df_test = pd.DataFrame(columns = xlistnames+ylistnames)

while (cap.isOpened()):
  ret, frame = cap.read()
  if ret is not True :
    break
  height, width, channels = frame.shape
  
  x=GetLandmarksOfImage(frame)

  if x[0]==[] or x[1]==[] or x[2]==[] or x[3]==[]:
    continue

  x_landmarks, y_landmarks = Normalize_Landmarks(x)

  df_test.loc[len(df_test)] = list(x_landmarks) + list(y_landmarks) 

  predictions = multioutputregressor.predict(df_test.tail(1))

  out_img = draw_axis(frame, predictions[0][1], predictions[0][0], predictions[0][2], x[2][0], x[2][1])

  video.write(out_img)

cv2.destroyAllWindows()
video.release()

                                                                                                   The end ✌