# Purpose
The goal of this program is to capture and save the facial landmarks for each video into an NPY file

For simplicity, we will only be converting the videos in the **MU3D** database.

In [1]:
from google.colab import drive
from google.colab.patches import cv2_imshow
drive.mount('/content/drive')

!pip3 install mediapipe

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import pandas as pd
import cv2
import glob

Mounted at /content/drive
Collecting mediapipe
  Downloading mediapipe-0.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.7/35.7 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the fo

# Obtaining the Truthfulness of Each Video
For the UMiami dataset (MU3D), the truthfulness of each video is saved in a separate Excel file. Therefore, we will first create a dictionary containing the truthfulness of each video. This will make it easier for later work because we won't have to search the Excel file each time we load a video.

In [2]:
FOLDER = '/content/drive/MyDrive/Deception_Detection/Datasets/MU3D-Package'
VIDEO_DATA_PATH = FOLDER + "/" + "VideosMP4"
EXCEL_DATA_PATH = FOLDER + "/"+ "MU3D Codebook.xlsx"
SAVE_NPY_PATH = FOLDER + "/" + "NPY_FILES(240frames)"

df = pd.read_excel(EXCEL_DATA_PATH, sheet_name="Video-Level Data")

# Iterate through each row of the Excel file
truthfulness = {}
for index, row in df.iterrows():
  truthfulness[row["VideoID"]] = row["Veracity"]

# Finding the FPS and Duration for the videos in our dataset
Since micro-expressions last between 1/25 to 1/5 seconds. Our video frame rate (fps) must be high enough to capture them. Let's first try to find our the FPS and Duration for the videos in our dataset

In [3]:
fpses = []
durations = []

for file_path in glob.glob(VIDEO_DATA_PATH+"/**", recursive = True):
  # Skip non-videos
  if file_path[-4:] != ".mp4":
    continue

  # Create a Video Capture Object
  video = cv2.VideoCapture(file_path)

  # Find the FPS of the video
  fps = video.get(cv2.CAP_PROP_FPS)
  fpses.append(fps)

  # Calculate the duration of the video in seconds
  frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
  duration = frame_count / fps
  durations.append(duration)


In [4]:
from statistics import mean
print(f"Average FPS: {mean(fpses):.2f}")
print(f"Max FPS: {max(fpses):.2f}")
print(f"Min FPS: {min(fpses):.2f}")

print()
print(f"Average Duration: {mean(durations):.2f}")
print(f"Max Duration: {max(durations):.2f}")
print(f"Min Duration: {min(durations):.2f}")

Average FPS: 29.98
Max FPS: 30.00
Min FPS: 29.97

Average Duration: 35.76
Max Duration: 57.79
Min Duration: 10.01


# Note
Based on the results, the MU3D videos generally has a higher FPS than those in the Trial dataset.<br>
We have decided to collect a frame every 0.10 seconds.<br>
For a video that has 25 frames per second, we will collect every 5th frame.<br>
For a video that has 10 frames per second, we will collect every frame.

In [5]:
import sys

def detectLandmarks(RGBinput):
  # STEP 1: Create an FaceLandmarker object.
  base_options = python.BaseOptions(model_asset_path='/content/drive/MyDrive/Deception_Detection/Code/face_landmarker.task')
  options = vision.FaceLandmarkerOptions(base_options=base_options,
                                        output_face_blendshapes=True,
                                        output_facial_transformation_matrixes=True,
                                        num_faces=1)
  detector = vision.FaceLandmarker.create_from_options(options)
  # STEP 2: Load the input image.
  image = mp.Image(image_format=mp.ImageFormat.SRGB, data=RGBinput)
  # STEP 3: Detect face landmarks from the input image.
  detection_result = detector.detect(image)

  return detection_result

def normalized_landmarks_to_np_array(detection_result):
    normalized_landmarks = detection_result.face_landmarks[0]

    #478 Landmarks, xyz coordinates
    landmarks_array = np.zeros((478, 2))  # Initialize array to hold x, y, z coordinates

    for i, landmark in enumerate(normalized_landmarks):
        landmarks_array[i] = [landmark.x, landmark.y]
    return landmarks_array

def video_to_numpy(video, veracity):
  fps = video.get(cv2.CAP_PROP_FPS)
  every_nth_frame = round(fps * 0.1)

  count, success = 0, 1
  collected_frames = 0

  arr_list = [] # A list containing the arrays of multiple frames
  while success and collected_frames < 240:
    success, frame = video.read()
    if success and count % every_nth_frame == 0:
      detection_result = detectLandmarks(frame)
      try:
        nparray = normalized_landmarks_to_np_array(detection_result)
        arr_list.append(nparray)
        collected_frames = collected_frames + 1
      except IndexError:
        # An index error indicates that there was a failure to capture the face
        continue
    count = count + 1
  return np.stack(arr_list)


In [9]:
def file_exists(file_path):
    files = glob.glob(file_path)
    return len(files) > 0

for file_path in glob.glob(VIDEO_DATA_PATH+"/**", recursive = True):
  # Skip non-videos
  if file_path[-4:] != ".mp4":
    continue

  # Skip videos that have already been converted to NPY format
  filename = file_path.split("/")[-1][:-4]
  newfilepath = SAVE_NPY_PATH + "/" + f'{filename}.npy'
  if file_exists(newfilepath):
    continue

  # Create a Video Capture Object
  video = cv2.VideoCapture(file_path)

  # Truthness of the Speaker in Video
  veracity = truthfulness[filename]

  stacked_array = video_to_numpy(video, veracity)
  print(stacked_array.shape)
  np.save(newfilepath, stacked_array)