Deepfake Manipulation Detection

#### Imports

In [1]:
import cv2
import os
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision



#### Video to frames

In [2]:
def vid_to_frames(vid_path, output_dir, fps=25):

    # create output dir if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # capture the video
    vid = cv2.VideoCapture(vid_path)

    # get original fps
    original_fps = vid.get(cv2.CAP_PROP_FPS)

    # calculate interval between frames
    interval = int(original_fps / fps)

    # initialize frame counters
    frame_count = 0 # counter for all frames
    saved_frame_count = 0 # counter for saved frames

    # loop through each frame of the video
    while True:
        ret, frame = vid.read() # read a frame from the video
        if not ret: # break the loop if no frame is returned
            break

        # check if the current frame should be saved based on the interval
        if frame_count % interval == 0:
            # create the filename for the saved frame
            frame_filename = os.path.join(output_dir, f"frame_{saved_frame_count:04d}.png")
            # save the frame as a png file
            cv2.imwrite(frame_filename, frame)
            saved_frame_count += 1
        
        frame_count += 1
    
    vid.release()

    print(f"Extracted {saved_frame_count} frames at {fps} FPS.")


In [11]:
video_path = "./test/sa1-video-fadg0.avi"
output_path = "./output"
vid_to_frames(video_path, output_path)


Extracted 117 frames at 25 FPS.


#### Frames to video

In [None]:
# def frame_to_vid(input_dir, output_dir, fps=25):


#### Face Detection using BlazeFace

In [6]:

# # STEP 2: Create an FaceDetector object.
# base_options = python.BaseOptions(model_asset_path='./models/detector.tflite')
# options = vision.FaceDetectorOptions(base_options=base_options)
# detector = vision.FaceDetector.create_from_options(options)

# # STEP 3: Load the input image.
# image = mp.Image.create_from_file('./test/frames/sa1-video-fadg0.avi/frame_0000.png')

# # STEP 4: Detect faces in the input image.
# detection_result = detector.detect(image)

I0000 00:00:1717429244.099445 1637111 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717429244.103497 2130708 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) UHD Graphics 730 (ADL-S GT1)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1717429244.135270 2130713 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [7]:
print(detection_result)

DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=173, origin_y=146, width=172, height=172), categories=[Category(index=0, score=0.9432362914085388, display_name=None, category_name=None)], keypoints=[NormalizedKeypoint(x=0.4428234100341797, y=0.5039209723472595, label='', score=0.0), NormalizedKeypoint(x=0.5787340402603149, y=0.5050773620605469, label='', score=0.0), NormalizedKeypoint(x=0.5128469467163086, y=0.6167497634887695, label='', score=0.0), NormalizedKeypoint(x=0.5107505321502686, y=0.7039387822151184, label='', score=0.0), NormalizedKeypoint(x=0.36317509412765503, y=0.5427756309509277, label='', score=0.0), NormalizedKeypoint(x=0.6494894623756409, y=0.5473453998565674, label='', score=0.0)])])


In [8]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import matplotlib.pyplot as plt


def draw_landmarks_on_image(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]

    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])

    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())

  return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))

  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()

  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()

### Extract Facial Landmarks

#### Create FaceLandmarker object.

In [4]:
base_options = python.BaseOptions(model_asset_path='./models/face_landmarker.task')

options = vision.FaceLandmarkerOptions(base_options=base_options,
                                       output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=1)

detector = vision.FaceLandmarker.create_from_options(options)

I0000 00:00:1717577652.614964 4078610 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717577652.616016 4101307 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) UHD Graphics 730 (ADL-S GT1)
W0000 00:00:1717577652.616340 4078610 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1717577652.619832 4101312 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717577652.627681 4101311 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


#### Load input image.

In [13]:
image = mp.Image.create_from_file('./download.png')

#### Inference.

In [10]:
detection_result = detector.detect(image)



#### Extract Facial Landmarks from Video. 

In [14]:
BaseOptions = mp.tasks.BaseOptions
FaceLandmarker = mp.tasks.vision.FaceLandmarker
FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [21]:
# create a facelandmarker instance with the video mode:
options = FaceLandmarkerOptions(
    base_options = BaseOptions(model_asset_path='./models/face_landmarker.task'),
    running_mode=VisionRunningMode.VIDEO,
    output_face_blendshapes=True,
    output_facial_transformation_matrixes=True,
    num_faces=1
)

with FaceLandmarker.create_from_options(options) as landmarker:
    result1 = landmarker.detect("./data/vidtimit/deepfake/videos/sa1-video-fadg0.avi")




I0000 00:00:1717611449.709235 2173355 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717611449.710810 2267577 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) UHD Graphics 730 (ADL-S GT1)
W0000 00:00:1717611449.711157 2173355 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1717611449.714229 2267582 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717611449.718903 2267584 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


ValueError: Please provide 'image_format' with 'data'.

In [20]:
print(result1.facial_transformation_matrixes)

[array([[ 9.92205501e-01, -6.04903474e-02,  1.08946197e-01,
         1.18711746e+00],
       [ 9.10740122e-02,  9.48733926e-01, -3.02671373e-01,
         2.23701134e+01],
       [-8.50522369e-02,  3.10234487e-01,  9.46847558e-01,
        -6.80049210e+01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00]])]
