In [None]:
!pip install opencv-python-headless
!pip install tensorflow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip "/content/drive/My Drive/archive.zip" -d "/content/drive/My Drive/dataset3"

In [None]:
!df -h

https://www.kaggle.com/competitions/deepfake-detection-challenge/data

In [None]:
import os
import json
import cv2
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Dropout, GlobalAveragePooling2D, GlobalAveragePooling1D, Concatenate, GlobalAveragePooling3D, Conv3D, BatchNormalization, MaxPooling3D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
from keras.layers import TimeDistributed, Conv3D, Reshape
from tensorflow.keras.applications import MobileNetV2



In [None]:
!unzip -o "/content/drive/My Drive/deepfake-detection-challenge.zip" -d "/content/drive/My Drive/datasetvd"

unzip:  cannot find or open /content/drive/My Drive/deepfake-detection-challenge.zip, /content/drive/My Drive/deepfake-detection-challenge.zip.zip or /content/drive/My Drive/deepfake-detection-challenge.zip.ZIP.


In [None]:
def load_metadata(path):

    with open(path, 'r') as file:
        return json.load(file)

def extract_frames(video_path, num_frames=4):

    cap = cv2.VideoCapture(video_path)

    frames = []

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    frames_to_capture = np.linspace(0, frame_count - 1, num_frames, dtype=int)

    for i in frames_to_capture:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)

        success, frame = cap.read()

        if success:
            frame = cv2.resize(frame, (224, 224))

            frame = frame / 255.0

            frames.append(frame)
        else:
            break

    cap.release()

    while len(frames) < num_frames:
        frames.append(np.zeros((224, 224, 3)))

    return np.array(frames)

def extract_audio_features(video_path, sr=22050, n_mfcc=13):

    y, _ = librosa.load(video_path, sr=sr, duration=5.0)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    mfcc = np.mean(mfcc, axis=1)

    return mfcc


def load_and_process_data(data_dir, metadata):

    video_features = []

    audio_features = []

    labels = []

    for filename, info in metadata.items():

        video_path = os.path.join(data_dir, filename)

        frames = extract_frames(video_path)

        mfcc = extract_audio_features(video_path)

        video_features.append(frames)

        audio_features.append(mfcc)

        labels.append(1 if info['label'] == 'FAKE' else 0)

    return np.array(video_features), np.array(audio_features), np.array(labels)


In [None]:
def build_video_model():

    video_input = Input(shape=(4, 224, 224, 3))

    x = Conv3D(32, kernel_size=(3, 3, 3), activation='relu')(video_input)

    x = BatchNormalization()(x)

    x = MaxPooling3D(pool_size=(2, 2, 2))(x)

    x = Conv3D(64, kernel_size=(3, 3, 3), activation='relu')(x)

    x = BatchNormalization()(x)

    x = MaxPooling3D(pool_size=(2, 2, 2))(x)

    x = GlobalAveragePooling3D()(x)

    x = Dense(256, activation='relu')(x)

    x = Dropout(0.5)(x)

    x = Dense(128, activation='relu')(x)

    x = Dropout(0.5)(x)

    return Model(inputs=video_input, outputs=x)

def build_audio_model(input_shape=(None, 13)):

    audio_input = Input(shape=(13,))

    x = Dense(256, activation='relu')(audio_input)

    x = Dropout(0.5)(x)

    x = Dense(128, activation='relu')(x)

    x = Dropout(0.5)(x)

    return Model(inputs=audio_input, outputs=x)

In [None]:
metadata_path = '/content/drive/My Drive/datasetvd/train_sample_videos/metadata.json'

data_dir = '/content/drive/My Drive/datasetvd/train_sample_videos'

metadata = load_metadata(metadata_path)

video_data, audio_data, labels = load_and_process_data(data_dir, metadata)

train_val_videos, test_videos, train_val_audios, test_audios, train_val_labels, test_labels = train_test_split(
    video_data, audio_data, labels, test_size=0.2, random_state=42)

train_videos, val_videos, train_audios, val_audios, train_labels, val_labels = train_test_split(
    train_val_videos, train_val_audios, train_val_labels, test_size=0.25, random_state=42)

print("Number of videos in training set:", len(train_videos))

print("Number of videos in validation set:", len(val_videos))

print("Number of videos in testing set:", len(test_videos))

print("Number of audios in training set:", len(train_audios))

print("Number of audios in validation set:", len(val_audios))

print("Number of audios in testing set:", len(test_audios))



In [None]:
video_model = build_video_model()

audio_model = build_audio_model()

combined_input = Concatenate()([video_model.output, audio_model.output])

x = Dense(64, activation='relu')(combined_input)

final_output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[video_model.input, audio_model.input], outputs=final_output)

In [None]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

class_weights_dict = {i : class_weights[i] for i in range(len(class_weights))}

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.fit([train_videos, train_audios], train_labels, validation_data=([val_videos, val_audios], val_labels), epochs=10, batch_size=4, class_weight=class_weights_dict)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7bf8fb34eaa0>

In [None]:
test_loss, test_accuracy = model.evaluate([test_videos, test_audios], test_labels)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 78.75%


In [None]:
predictions = model.predict([test_videos, test_audios])

predicted_labels = (predictions > 0.5).astype(int)

true_labels = test_labels

conf_matrix = confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")

print(conf_matrix)

class_report = classification_report(true_labels, predicted_labels)

print("Classification Report:")

print(class_report)

Confusion Matrix:
[[ 2 10]
 [ 7 61]]
Classification Report:
              precision    recall  f1-score   support

           0       0.22      0.17      0.19        12
           1       0.86      0.90      0.88        68

    accuracy                           0.79        80
   macro avg       0.54      0.53      0.53        80
weighted avg       0.76      0.79      0.77        80

