In [1]:
!pip install tqdm tensorflow_docs mtcnn imutils imageio

Collecting tensorflow_docs
  Downloading tensorflow_docs-2024.2.5.73858-py3-none-any.whl.metadata (955 bytes)
Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting imutils
  Downloading imutils-0.5.4.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l- \ done
Collecting astor (from tensorflow_docs)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Downloading tensorflow_docs-2024.2.5.73858-py3-none-any.whl (182 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.5/182.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Building wheels for collected packages: imutils
  Building wheel for imutils (setup.py) ... [?25l- \ | done
[?25h  Created wheel for imutils: fil

In [2]:
base_dir = "/kaggle/input/mini-face-forensics/FF++/"

In [3]:
import os
import pandas as pd
import cv2 as cv
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from mtcnn import MTCNN
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import imageio

In [4]:
label_types = os.listdir('/kaggle/input/mini-face-forensics/FF++')
print (label_types)

['fake', 'real']


In [5]:
detector = MTCNN()

In [6]:
# Example data
video_paths = []
labels = []  


for sub_folder in os.listdir(base_dir):
    label = sub_folder
    
    sub_folder = os.path.join(base_dir, sub_folder)
    
    for vid in os.listdir(sub_folder):
        video_paths.append(os.path.join(sub_folder, vid))
        labels.append(label)

In [7]:
# Print the first 10 and last 10 elements of video_paths
print("First 10 video paths:", video_paths[:10])
print("Last 10 video paths:", video_paths[-10:])

# Print the first 10 and last 10 elements of labels
print("First 10 labels:", labels[:10])
print("Last 10 labels:", labels[-10:])

First 10 video paths: ['/kaggle/input/mini-face-forensics/FF++/fake/02_13__exit_phone_room__CP5HFV3K.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/03_14__talking_against_wall__ZC2KYASW.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/03_15__outside_talking_pan_laughing__Y11NT1YX.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/07_26__walking_down_street_outside_angry__FGNGC2GT.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/07_03__hugging_happy__7NGMD8FT.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/07_09__walk_down_hall_angry__N9CWME71.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/01_12__outside_talking_pan_laughing__TNI7KUZ6.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/07_03__podium_speech_happy__6PHZRQ4H.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/07_02__walking_down_street_outside_angry__O4SXNLRL.mp4', '/kaggle/input/mini-face-forensics/FF++/fake/08_05__walk_down_hall_angry__FBICSP2C.mp4']
Last 10 video paths: ['/kaggle/input/mini-face-forensics/F

In [8]:
# Preparing the data
df = pd.DataFrame({'video_path': video_paths, 'label': labels})

In [9]:
print(df.head())
print(df.tail())

                                          video_path label
0  /kaggle/input/mini-face-forensics/FF++/fake/02...  fake
1  /kaggle/input/mini-face-forensics/FF++/fake/03...  fake
2  /kaggle/input/mini-face-forensics/FF++/fake/03...  fake
3  /kaggle/input/mini-face-forensics/FF++/fake/07...  fake
4  /kaggle/input/mini-face-forensics/FF++/fake/07...  fake
                                            video_path label
395  /kaggle/input/mini-face-forensics/FF++/real/13...  real
396  /kaggle/input/mini-face-forensics/FF++/real/06...  real
397  /kaggle/input/mini-face-forensics/FF++/real/02...  real
398  /kaggle/input/mini-face-forensics/FF++/real/04...  real
399  /kaggle/input/mini-face-forensics/FF++/real/12...  real


In [10]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)

In [11]:
print(train_df.head())
print(train_df.tail())

                                            video_path label
3    /kaggle/input/mini-face-forensics/FF++/fake/07...  fake
18   /kaggle/input/mini-face-forensics/FF++/fake/02...  fake
202  /kaggle/input/mini-face-forensics/FF++/real/08...  real
250  /kaggle/input/mini-face-forensics/FF++/real/14...  real
274  /kaggle/input/mini-face-forensics/FF++/real/13...  real
                                            video_path label
71   /kaggle/input/mini-face-forensics/FF++/fake/03...  fake
106  /kaggle/input/mini-face-forensics/FF++/fake/01...  fake
270  /kaggle/input/mini-face-forensics/FF++/real/11...  real
348  /kaggle/input/mini-face-forensics/FF++/real/11...  real
102  /kaggle/input/mini-face-forensics/FF++/fake/02...  fake


In [12]:
print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")


train_df.sample(10)

Total videos for training: 320
Total videos for testing: 80


Unnamed: 0,video_path,label
4,/kaggle/input/mini-face-forensics/FF++/fake/07...,fake
190,/kaggle/input/mini-face-forensics/FF++/fake/01...,fake
69,/kaggle/input/mini-face-forensics/FF++/fake/04...,fake
366,/kaggle/input/mini-face-forensics/FF++/real/03...,real
135,/kaggle/input/mini-face-forensics/FF++/fake/03...,fake
259,/kaggle/input/mini-face-forensics/FF++/real/03...,real
201,/kaggle/input/mini-face-forensics/FF++/real/14...,real
326,/kaggle/input/mini-face-forensics/FF++/real/05...,real
379,/kaggle/input/mini-face-forensics/FF++/real/15...,real
356,/kaggle/input/mini-face-forensics/FF++/real/15...,real


## capturing the frames

# face detection strategies

## 1) Run Face Detection Only Once for the First Frame

In [13]:
def get_face_region_first_frame(frame, previous_box=None):
    if previous_box is None:
        # Detect the face only if no previous bounding box is provided
        detections = detector.detect_faces(frame)
        if detections:
            x, y, width, height = detections[0]['box']
            previous_box = (x, y, width, height)
        else:
            return crop_center_square(frame), None  # fallback to center crop if no face detected
    else:
        x, y, width, height = previous_box

    face_region = frame[y:y+height, x:x+width]
    return face_region, previous_box

## 2) Reduce frame Resolution before detection

In [14]:
# def get_face_region_small_frame(frame, scale=0.5):
#     small_frame = cv.resize(frame, None, fx=scale, fy=scale)
#     detections = detector.detect_faces(small_frame)

#     if detections:
#         x, y, width, height = detections[0]['box']
#         # Scale bounding box back to the original frame size
#         x, y, width, height = int(x / scale), int(y / scale), int(width / scale), int(height / scale)
#         face_region = frame[y:y+height, x:x+width]
#     else:
#         face_region = crop_center_square(frame)
#     return face_region

In [15]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]

In [16]:
IMG_SIZE = 224

def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE), skip_frames=2):
    cap = cv.VideoCapture(path)
    frames = []
    frame_count = 0
    previous_box = None  # Store the bounding box from the first frame

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Process frames as per skip rate
            if frame_count % skip_frames == 0:
                frame, previous_box = get_face_region_first_frame(frame, previous_box)
                frame = cv.resize(frame, resize)
                frame = frame[:, :, [2, 1, 0]]  # BGR to RGB
                frames.append(frame)

                if len(frames) == max_frames:
                    break
            frame_count += 1

        # Pad with the last frame if we have fewer frames than max_frames
        while len(frames) < max_frames and frames:
            frames.append(frames[-1])

    finally:
        cap.release()

    return np.array(frames)

## Feature extraction

In [17]:
def build_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [18]:
label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["label"]))
print(label_processor.get_vocabulary())

labels = train_df["label"].values
labels = label_processor(labels[..., None]).numpy()
labels

['fake', 'real']


array([[0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
    

In [19]:
#Define hyperparameters

IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 100

MAX_SEQ_LENGTH = 20
NUM_FEATURES = 2048

In [20]:
def prepare_all_videos(df):
    num_samples = len(df)
    video_paths = df["video_path"].values.tolist()
    
    ##take all classlabels from train_df column named 'label' and store in labels
    labels = df["label"].values
    
    #convert classlabels to label encoding
    labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool") # 145,20
    frame_features = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32") #145,20,2048

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(path)
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


train_data, train_labels = prepare_all_videos(train_df)
test_data, test_labels = prepare_all_videos(test_df)

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")



print(f"train_labels in train set: {train_labels.shape}")

print(f"test_labels in train set: {test_labels.shape}")

# MAX_SEQ_LENGTH = 20, NUM_FEATURES = 2048. We have defined this above under hyper parameters

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms

In [21]:
def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
    x = keras.layers.GRU(8)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Dense(8, activation="relu")(x)
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model

EPOCHS = 30
# Utility for running experiments.
def run_experiment():
    filepath = "/kaggle/working/tmp/video_classifier.weights.h5"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    history = seq_model.fit(
        [train_data[0], train_data[1]],
        train_labels,
        validation_split=0.3,
        epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


_, sequence_model = run_experiment()

Epoch 1/30
[1m4/7[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 18ms/step - accuracy: 0.4844 - loss: 0.7991
Epoch 1: val_loss improved from inf to 0.69087, saving model to /kaggle/working/tmp/video_classifier.weights.h5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 108ms/step - accuracy: 0.4903 - loss: 0.7855 - val_accuracy: 0.5312 - val_loss: 0.6909
Epoch 2/30
[1m4/7[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 17ms/step - accuracy: 0.5469 - loss: 0.6988
Epoch 2: val_loss improved from 0.69087 to 0.69018, saving model to /kaggle/working/tmp/video_classifier.weights.h5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.5467 - loss: 0.7065 - val_accuracy: 0.5312 - val_loss: 0.6902
Epoch 3/30
[1m5/7[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 17ms/step - accuracy: 0.5098 - loss: 0.7251
Epoch 3: val_loss improved from 0.69018 to 0.66232, saving model to /kaggle/working/tmp/video_classifier.weights.h5
[1m7/7

## Testing

In [22]:
def prepare_single_video(frames):
    frames = frames[None, ...]
    frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(MAX_SEQ_LENGTH, video_length)
        for j in range(length):
            frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
        frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

    return frame_features, frame_mask


def sequence_prediction(path):
    class_vocab = label_processor.get_vocabulary()

    frames = load_video(os.path.join("test", path))
    frame_features, frame_mask = prepare_single_video(frames)
    probabilities = sequence_model.predict([frame_features, frame_mask])[0]

    for i in np.argsort(probabilities)[::-1]:
        print(f"  {class_vocab[i]}: {probabilities[i] * 100:5.2f}%")
    return frames

test_video = np.random.choice(test_df["video_path"].values.tolist())
print(f"Test video path: {test_video}")

test_frames = sequence_prediction(test_video)

Test video path: /kaggle/input/mini-face-forensics/FF++/real/13__hugging_happy.mp4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [