In [1]:
!rm -rf /kaggle/working/*

In [2]:
import os
import cv2

def extract_frames(video_path, output_dir, label):
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    count = 0
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % fps == 0:  # One frame per second
            save_path = os.path.join(output_dir, f"{label}_{os.path.basename(video_path).split('.')[0]}_{frame_id}.jpg")
            cv2.imwrite(save_path, frame)
            frame_id += 1
        count += 1
    cap.release()

In [3]:
real_dir = "/kaggle/input/celeb-df-v2/Celeb-real"
fake_dir = "/kaggle/input/celeb-df-v2/Celeb-synthesis"
yt_dir = "/kaggle/input/celeb-df-v2/YouTube-real"
output_real = "/kaggle/working/frames/real"
output_fake = "/kaggle/working/frames/fake"

for video in os.listdir(real_dir):
    extract_frames(os.path.join(real_dir, video), output_real, "real")

for video in os.listdir(fake_dir):
    extract_frames(os.path.join(fake_dir, video), output_fake, "fake")

In [4]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import glob

# Paths
real_imgs = glob.glob("/kaggle/working/frames/real/*.jpg")
fake_imgs = glob.glob("/kaggle/working/frames/fake/*.jpg")
file_paths = real_imgs + fake_imgs
labels = [0]*len(real_imgs) + [1]*len(fake_imgs)

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(file_paths, labels, test_size=0.3, stratify=labels)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp)


2025-04-21 08:03:15.482702: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745222595.665462      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745222595.726614      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
IMG_SIZE = (299, 299)

def load_preprocess(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.keras.applications.xception.preprocess_input(img)
    return img, label

def get_dataset(paths, labels, batch_size=32, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(len(paths))
    ds = ds.map(load_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_ds = get_dataset(X_train, y_train)
val_ds = get_dataset(X_val, y_val, shuffle=False)
test_ds = get_dataset(X_test, y_test, shuffle=False)

I0000 00:00:1745222610.258053      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [6]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model

base = Xception(include_top=False, weights='imagenet', input_shape=(299, 299, 3))
base.trainable = False  # Freeze base initially

x = GlobalAveragePooling2D()(base.output)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base.input, outputs=output)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [7]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
]

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=callbacks
)

Epoch 1/10


I0000 00:00:1745222626.372578   24990 service.cc:148] XLA service 0x7ace7c03ad70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745222626.374858   24990 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745222627.449409   24990 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   1/1781[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:12:53[0m 17s/step - accuracy: 0.4375 - auc: 0.7679 - loss: 0.7024 - precision: 1.0000 - recall: 0.3571

I0000 00:00:1745222635.484706   24990 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 146ms/step - accuracy: 0.9020 - auc: 0.5506 - loss: 0.3235 - precision: 0.9057 - recall: 0.9954 - val_accuracy: 0.9039 - val_auc: 0.6104 - val_loss: 0.3083 - val_precision: 0.9039 - val_recall: 1.0000
Epoch 2/10
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 139ms/step - accuracy: 0.9030 - auc: 0.5992 - loss: 0.3152 - precision: 0.9031 - recall: 0.9998 - val_accuracy: 0.9041 - val_auc: 0.6212 - val_loss: 0.3071 - val_precision: 0.9041 - val_recall: 1.0000
Epoch 3/10
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 139ms/step - accuracy: 0.9058 - auc: 0.6080 - loss: 0.3085 - precision: 0.9060 - recall: 0.9998 - val_accuracy: 0.9042 - val_auc: 0.6289 - val_loss: 0.3056 - val_precision: 0.9041 - val_recall: 1.0000
Epoch 4/10
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 139ms/step - accuracy: 0.9034 - auc: 0.6124 - loss: 0.3124 - precision: 0.9039 - rec

<keras.src.callbacks.history.History at 0x7acfbaa7c0d0>

In [8]:
import numpy as np
from sklearn.metrics import classification_report, roc_auc_score

y_true = np.array(y_test)
y_probs = model.predict(test_ds).ravel()
y_pred = (y_probs > 0.5).astype(int)

print(classification_report(y_true, y_pred, target_names=["Real", "Fake"]))
print("AUC-ROC:", roc_auc_score(y_true, y_probs))

[1m382/382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 114ms/step
              precision    recall  f1-score   support

        Real       0.60      0.00      0.01      1174
        Fake       0.90      1.00      0.95     11034

    accuracy                           0.90     12208
   macro avg       0.75      0.50      0.48     12208
weighted avg       0.87      0.90      0.86     12208

AUC-ROC: 0.6384639594698622
