<a href="https://colab.research.google.com/github/rahulchalla-60/morph_detection/blob/main/Morph_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import kagglehub
import os

# Download dataset from Kaggle
path = kagglehub.dataset_download("hungle3401/faceforensics")

print("Dataset downloaded to:", path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/hungle3401/faceforensics?dataset_version_number=1...


100%|██████████| 2.73G/2.73G [02:14<00:00, 21.7MB/s]

Extracting files...





Dataset downloaded to: /root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1


In [6]:
import os

path = "/root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1"

for root, dirs, files in os.walk(path):
    print("📂", root, "->", len(files), "files")
    for f in files[:5]:   # preview first 5 files
        print("   ", f)



📂 /root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1 -> 0 files
📂 /root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1/FF++ -> 0 files
📂 /root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1/FF++/fake -> 200 files
    02_01__secret_conversation__YVGY8LOK.mp4
    09_26__walk_down_hall_angry__QSE5A0GF.mp4
    07_14__talking_against_wall__P9QFO50U.mp4
    07_03__outside_talking_pan_laughing__IFSURI9X.mp4
    07_25__walk_down_hall_angry__PAE9HCA8.mp4
📂 /root/.cache/kagglehub/datasets/hungle3401/faceforensics/versions/1/FF++/real -> 200 files
    07__kitchen_pan.mp4
    15__outside_talking_still_laughing.mp4
    05__outside_talking_still_laughing.mp4
    13__walking_and_outside_surprised.mp4
    01__kitchen_pan.mp4


In [7]:
fake_videos = os.path.join(path, "FF++", "fake")
real_videos = os.path.join(path, "FF++", "real")

output_base = "processed_dataset"
real_dir = os.path.join(output_base, "real")
fake_dir = os.path.join(output_base, "fake")
os.makedirs(real_dir, exist_ok=True)
os.makedirs(fake_dir, exist_ok=True)

import cv2

def extract_frames(video_path, output_dir, max_frames=20):
    cap = cv2.VideoCapture(video_path)
    count = 0
    while cap.isOpened() and count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        # Resize to save time/disk space
        frame = cv2.resize(frame, (224, 224))
        frame_filename = f"{os.path.basename(video_path).split('.')[0]}_frame{count}.jpg"
        cv2.imwrite(os.path.join(output_dir, frame_filename), frame)
        count += 1
    cap.release()

# Extract frames from fake + real
for f in os.listdir(fake_videos):
    extract_frames(os.path.join(fake_videos, f), fake_dir)

for f in os.listdir(real_videos):
    extract_frames(os.path.join(real_videos, f), real_dir)

print("✅ Frames extracted to:", output_base)



✅ Frames extracted to: processed_dataset


In [8]:
import tensorflow as tf

img_size = (224, 224)
batch_size = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    output_base,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=img_size,
    batch_size=batch_size
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    output_base,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=img_size,
    batch_size=batch_size
)


Found 8000 files belonging to 2 classes.
Using 6400 files for training.
Found 8000 files belonging to 2 classes.
Using 1600 files for validation.


In [9]:
#normalize pixel values to [0,1]
AUTOTUNE = tf.data.AUTOTUNE
normalization_layer = tf.keras.layers.Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y), num_parallel_calls=AUTOTUNE)

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [10]:
from tensorflow.keras import layers, models, applications

# Load MobileNetV2 backbone (pretrained on ImageNet)
base_model = applications.MobileNetV2(input_shape=(224,224,3),
                                      include_top=False,
                                      weights="imagenet")
base_model.trainable = False  # freeze backbone

# Build model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation="sigmoid")  # binary classification
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss="binary_crossentropy",
              metrics=["accuracy"])

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [11]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)


Epoch 1/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 82ms/step - accuracy: 0.5478 - loss: 0.7540 - val_accuracy: 0.6231 - val_loss: 0.6286
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.6227 - loss: 0.6470 - val_accuracy: 0.7175 - val_loss: 0.5745
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.6832 - loss: 0.5835 - val_accuracy: 0.7281 - val_loss: 0.5479
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.7103 - loss: 0.5580 - val_accuracy: 0.7362 - val_loss: 0.5289
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.7164 - loss: 0.5386 - val_accuracy: 0.7400 - val_loss: 0.5144
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.7286 - loss: 0.5304 - val_accuracy: 0.7494 - val_loss: 0.5024
Epoch 7/10
[1m200/20

In [12]:
base_model.trainable = True

# Recompile with lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss="binary_crossentropy",
              metrics=["accuracy"])

history_finetune = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5
)


Epoch 1/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 121ms/step - accuracy: 0.6617 - loss: 0.6033 - val_accuracy: 0.7337 - val_loss: 0.4914
Epoch 2/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 107ms/step - accuracy: 0.8212 - loss: 0.3858 - val_accuracy: 0.7600 - val_loss: 0.4518
Epoch 3/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 100ms/step - accuracy: 0.8531 - loss: 0.3166 - val_accuracy: 0.7781 - val_loss: 0.4175
Epoch 4/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 107ms/step - accuracy: 0.8832 - loss: 0.2717 - val_accuracy: 0.7987 - val_loss: 0.3951
Epoch 5/5
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 101ms/step - accuracy: 0.8964 - loss: 0.2377 - val_accuracy: 0.8019 - val_loss: 0.3847


In [16]:

model.save("deepfake_detector_mobilenetv2.keras")


In [17]:
from google.colab import files
files.download("deepfake_detector_mobilenetv2.keras")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>