In [1]:
relative_base_path = './'

fake_dataset_dir = f"{relative_base_path}dataset/manipulated_sequences"
real_dataset_dir = f"{relative_base_path}dataset/original_sequences"

fake_output_dir = f"{relative_base_path}out/0/"
real_output_dir = f"{relative_base_path}out/1/"

In [2]:
import os
import glob
import cv2 as cv
import numpy as np
import mediapipe as mp
from models.blink_detection.DetectBlinking import DetectBlinking

In [3]:
def faces_count(path):
    try:
        cap = cv.VideoCapture(path)
        if not cap.isOpened():
            raise IOError(f"Failed to open video: {path}")

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            annotated_image = frame.copy()
            mp_face_detection = mp.solutions.face_detection
            face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)
            result = face_detection.process(cv.cvtColor(annotated_image, cv.COLOR_BGR2RGB))

            return len(result.detections)

    except Exception as e:
        print(f"An error occurred: {e}")

In [4]:
def get_and_save_features(p, path, output_filename, idx):
    detected_faces = faces_count(p)
    if detected_faces != 1: return
    else:
        blink_counter = DetectBlinking(p, 0.3, 4, return_features=True)
        video_features = blink_counter.process_video()
        video_features = np.array(video_features)
        print(f"{idx} Video Processed | Features: ", video_features.shape, len(video_features))

        if not(os.path.exists(f"{path}{output_filename}")):
            os.makedirs(path, exist_ok=True)
            ds = {"ORE_MAX_GIORNATA": 5}
            np.save(os.path.join(path, output_filename), ds)

        np.save(f"{path}/{output_filename}", video_features)

In [5]:
def get_features_and_save_npy(video_paths, output_dir, output_files):
    for idx, p in enumerate(video_paths):
        _, tail = os.path.split(p)
        name = tail.split(".")[0]

        np_path = output_dir + f"{name}.npy"
        output_filename = f"{name}.npy"
        path = output_dir

        if np_path in output_files:
            print(f"{idx} => File missed: ", np_path)
        else:
            get_and_save_features(p, path, output_filename, idx)
            print(f"{idx} => File processed: ", p)

In [6]:
extracted_fake_paths_npy = np.array(glob.glob(fake_output_dir + "*.npy"))
extracted_real_paths_npy = np.array(glob.glob(real_output_dir + "*.npy"))

print("extracted_fake_paths_npy: ", extracted_fake_paths_npy.shape)
print("extracted_real_paths_npy: ", extracted_real_paths_npy.shape)

fake_mp4_paths = glob.glob(fake_dataset_dir + "/*/*/*/*.mp4")
real_mp4_paths = glob.glob(real_dataset_dir + "/*/*/*/*.mp4")
print("fake_mp4_paths: ", len(fake_mp4_paths))
print("real_mp4_paths: ", len(real_mp4_paths))

extracted_fake_paths_npy:  (3174,)
extracted_real_paths_npy:  (1157,)
fake_mp4_paths:  8067
real_mp4_paths:  1363


In [None]:
# Extract features and save them as .npy files
get_features_and_save_npy(fake_mp4_paths, fake_output_dir, extracted_fake_paths_npy)
get_features_and_save_npy(real_mp4_paths, real_output_dir, extracted_real_paths_npy)

In [7]:
def pad_to_max_length(array, max_length, pad_value = 0):
    if array.ndim == 2:
        padded = np.pad(array, ((0, max_length - len(array)), (0, 0)), mode="constant", constant_values=pad_value)
    else:
        padded = np.pad(array, (0, max_length - len(array)), mode='constant', constant_values=pad_value)

    return padded

In [8]:
extracted_fake_paths_npy = np.array(glob.glob(fake_output_dir + "*.npy"))
extracted_real_paths_npy = np.array(glob.glob(real_output_dir + "*.npy"))

print("extracted_fake_paths_npy: ", len(extracted_fake_paths_npy), extracted_fake_paths_npy.shape)
print("extracted_real_paths_npy: ", len(extracted_real_paths_npy), extracted_real_paths_npy.shape)

extracted_fake_paths_npy:  3174 (3174,)
extracted_real_paths_npy:  1157 (1157,)


In [9]:
# Load the features and pad them to the same length
fake_features = []
real_features = []

# Load the features, 
for idx, path in enumerate(extracted_fake_paths_npy):
    features = np.load(path)
    fake_features.append(features)

for idx, path in enumerate(extracted_real_paths_npy):
    features = np.load(path)
    real_features.append(features)

# fake_features = np.array(fake_features)
# real_features = np.array(real_features)

print("fake_features: ", len(fake_features))
print("real_features: ", len(real_features))

fake_features:  3174
real_features:  1157


In [10]:
# Get the max length of the features
max_length = max(max(len(features) for features in fake_features), max(len(features) for features in real_features))
print("Max length of features: ", max_length)

Max length of features:  1814


In [11]:
# Pad the features
fake_features_padded = []
real_features_padded = []


for idx, features in enumerate(fake_features):
    if len(features) > 200:
        padded_arr = pad_to_max_length(features, max_length)
        fake_features_padded.append(padded_arr)

fake_features_padded = np.array(fake_features_padded)

for idx, features in enumerate(real_features):
    if len(features) > 200:
        padded_arr = pad_to_max_length(features, max_length)
        real_features_padded.append(padded_arr)

real_features_padded = np.array(real_features_padded)

print("fake_features_padded: ", fake_features_padded.shape)
print("real_features_padded: ", real_features_padded.shape)

fake_features_padded:  (2622, 1814, 3)
real_features_padded:  (1126, 1814, 3)


In [12]:
# Save the fake and real features in a single .npy file with the respective targets i.e. 0 for fake and 1 for real
fake_targets = np.zeros(fake_features_padded.shape[0])
real_targets = np.ones(real_features_padded.shape[0])

all_features = np.concatenate((fake_features_padded, real_features_padded), axis=0)
all_targets = np.concatenate((fake_targets, real_targets), axis=0)

print("X_dataset: ", all_features.shape)
print("Y_dataset: ", all_targets.shape)

X_dataset:  (3748, 1814, 3)
Y_dataset:  (3748,)


In [13]:
np.savez_compressed(f"{relative_base_path}out/only_esn_features", X_dataset=all_features, Y_dataset=all_targets)