In [2]:
relative_base_path = './'

fake_dataset_dir = f"{relative_base_path}dataset/manipulated_sequences"
real_dataset_dir = f"{relative_base_path}dataset/original_sequences"

fake_output_dir = f"{relative_base_path}out/fake"
real_output_dir = f"{relative_base_path}out/real"

In [3]:
import os
import glob
import numpy as np
from tqdm import tqdm
from models.blink_detection.DetectBlinking import DetectBlinking
from sklearn.decomposition import PCA

In [5]:
def apply_pca(features, n_components=50):
    pca = PCA(n_components=n_components)
    reduced_features = pca.fit_transform(features)

    return reduced_features

In [6]:
def save(path, output_filename, features):
    if not (os.path.exists(f"{path}{output_filename}")):
        os.makedirs(path, exist_ok=True)
        ds = {"ORE_MAX_GIORNATA": 5}
        np.savez_compressed(os.path.join(path, output_filename), ds)

    print(f"Video Processed | Features: ", features.shape)
    np.savez_compressed(f"{path}/{output_filename}", features=features)

In [7]:
def extract_features_and_save(video_paths, output_dir, output_files):
    saved_useless_files = np.load("out/useless_files.npy")

    useless_files = saved_useless_files.tolist()
    print("saved_useless_files: ", len(useless_files))
    for idx, p in enumerate(video_paths):
        _, tail = os.path.split(p)
        name = tail.split(".")[0]

        np_path = output_dir + f"/{name}.npz"
        output_filename = f"{name}.npz"
        path = output_dir

        if p not in saved_useless_files:
            if np_path in output_files:
                print(f"{idx} => File already processed: ", np_path)
            else:
                try:
                    detect_blinking = DetectBlinking(
                        p, 0.3, 4,
                        crop_face=True,
                        return_features=True,
                        process=True,
                        logs=False,
                    )
                    # print(f"Path: {p}")
                    video_features, ear_features = detect_blinking.process_video()

                    if video_features is not None and len(video_features) > 0:
                        video_features = np.array(video_features)
                        ear_features = np.array(ear_features)
                        reduced_features = apply_pca(video_features, n_components=50)
                        final_features = np.concatenate((reduced_features, ear_features), axis=1)
                        print(f"{idx} Final Features: ", final_features.shape)
                        save(path, output_filename, final_features)
                    else:
                        useless_files.append(p)
                        print(f"{idx} Video Skipped...", len(useless_files))
                        np.save("./out/useless_files.npy", useless_files)
                except Exception as e:
                    print(f"Found error is path: {p}")
                    print(f"Error: {e}")
                    useless_files.append(p)
                    print(f"{idx} Video Skipped...", len(useless_files))
                    np.save("./out/useless_files.npy", useless_files)

        else:
            print(f"File found in useless list: {p}")

In [8]:
extracted_fake_paths_npy = np.array(glob.glob(fake_output_dir + "/*.npz"))
extracted_real_paths_npy = np.array(glob.glob(real_output_dir + "/*.npz"))

print("extracted_fake_paths_npy: ", extracted_fake_paths_npy.shape)
print("extracted_real_paths_npy: ", extracted_real_paths_npy.shape)

fake_mp4_paths = glob.glob(fake_dataset_dir + "/*/*/*/*.mp4")
real_mp4_paths = glob.glob(real_dataset_dir + "/*/*/*/*.mp4")
print("fake_mp4_paths: ", len(fake_mp4_paths))
print("real_mp4_paths: ", len(real_mp4_paths))

extracted_fake_paths_npy:  (616,)
extracted_real_paths_npy:  (244,)
fake_mp4_paths:  8067
real_mp4_paths:  1363


In [None]:
# Extract features and save them as .npy files

extract_features_and_save(fake_mp4_paths, fake_output_dir, extracted_fake_paths_npy)
extract_features_and_save(real_mp4_paths, real_output_dir, extracted_real_paths_npy)

saved_useless_files:  710
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/13_20__walking_down_indoor_hall_disgust__EV1V4ZQV.mp4
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/14_18__walking_and_outside_surprised__0YRBHIKG.mp4
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/06_21__walking_and_outside_surprised__058I9NIZ.mp4
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/21_18__outside_talking_pan_laughing__PQFTWVRQ.mp4
4 => File already processed:  ./out/fake/03_07__talking_against_wall__F0YYEA5W.npz
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/03_13__hugging_happy__GBYWJW06.mp4
6 => File already processed:  ./out/fake/11_13__talking_against_wall__61T622EK.npz
File found in useless list: ./dataset/manipulated_sequences/DeepFakeDetection/c40/videos/03_15__outside_talking_pan_laughi

I0000 00:00:1736557127.821942 18893058 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
I0000 00:00:1736557128.873995 18893058 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 710ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [4]:
useless_paths = np.load("out/useless_files.npy")
print(len(useless_paths))

710


In [6]:
def pad_to_max_length(array, max_length, pad_value = 0):
    if array.ndim == 2:
        padded = np.pad(array, ((0, max_length - len(array)), (0, 0)), mode="constant", constant_values=pad_value)
    else:
        padded = np.pad(array, (0, max_length - len(array)), mode='constant', constant_values=pad_value)

    return padded

In [None]:
extracted_fake_paths_npy = np.array(glob.glob(fake_output_dir + "/*.npz"))
extracted_real_paths_npy = np.array(glob.glob(real_output_dir + "/*.npz"))

print("extracted_fake_paths_npy: ", len(extracted_fake_paths_npy), extracted_fake_paths_npy.shape)
print("extracted_real_paths_npy: ", len(extracted_real_paths_npy), extracted_real_paths_npy.shape)

In [None]:
for idx, path in enumerate(extracted_fake_paths_npy):
    features = np.load(path)
    try:
        print(f"{idx} --- {features["features"][0][0]}")
    except Exception as e:
        print(f"{idx} --- {path}")
        print(f"Error: {e}")


In [None]:
# Load the features and pad them to the same length
fake_features = []
real_features = []

# Load the features, 
for idx, path in enumerate(extracted_fake_paths_npy):
    features = np.load(path)["features"]
    fake_features.append(features)

for idx, path in enumerate(extracted_real_paths_npy):
    features = np.load(path)["features"]
    real_features.append(features)

# fake_features = np.array(fake_features)
# real_features = np.array(real_features)

print("fake_features: ", len(fake_features))
print("real_features: ", len(real_features))

In [None]:
# Get the max length of the features
max_length = max(max(len(features) for features in fake_features), max(len(features) for features in real_features))
print("Max length of features: ", max_length)

In [None]:
# Get the max length of the features
min_length = min(min(len(features) for features in fake_features), min(len(features) for features in real_features))
print("Min length of features: ", min_length)

In [None]:
# Pad the features
fake_features_padded = []
real_features_padded = []


for idx, features in enumerate(fake_features):
    if len(features) > 200:
        padded_arr = pad_to_max_length(features, max_length)
        fake_features_padded.append(padded_arr)

fake_features_padded = np.array(fake_features_padded)

for idx, features in enumerate(real_features):
    if len(features) > 200:
        padded_arr = pad_to_max_length(features, max_length)
        real_features_padded.append(padded_arr)

real_features_padded = np.array(real_features_padded)

print("fake_features_padded: ", fake_features_padded.shape)
print("real_features_padded: ", real_features_padded.shape)

In [14]:
np.savez_compressed(f"{relative_base_path}out/pca_features", fake_features=fake_features_padded, real_features=real_features_padded)

In [3]:
features = np.load("./out/pca_features.npz")
fake_features = features["fake_features"]
real_features = features["real_features"]

In [4]:
fake_features_padded = fake_features
real_features_padded = real_features

In [None]:
# Save the fake and real features in a single .npy file with the respective targets i.e. 0 for fake and 1 for real
fake_targets = np.zeros(fake_features_padded.shape[0])
real_targets = np.ones(real_features_padded.shape[0])

all_features = np.concatenate((fake_features_padded, real_features_padded), axis=0)
all_targets = np.concatenate((fake_targets, real_targets), axis=0)

print("X_dataset: ", all_features.shape)
print("Y_dataset: ", all_targets.shape)

In [35]:
np.savez_compressed(f"{relative_base_path}out/pca_features_dataset", X_dataset=all_features, Y_dataset=all_targets)