<a href="https://colab.research.google.com/github/kanchisoni/Adversarial-Robustness-Analysis-of-Deepfake-Detection/blob/main/Adversarial_Robustness_Analysis_of_Deepfake_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("prithivsakthiur/deepfake-vs-real-60k")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/prithivsakthiur/deepfake-vs-real-60k?dataset_version_number=1...


100%|██████████| 22.4G/22.4G [17:32<00:00, 22.8MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/prithivsakthiur/deepfake-vs-real-60k/versions/1


In [None]:
import os

data_path = path
print(os.listdir(data_path))

['deepfake-vs-real-60k']


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.applications.xception import preprocess_input


In [None]:
data_path = os.path.join(path, os.listdir(path)[0])
print("DATA PATH:", data_path)
print("INSIDE:", os.listdir(data_path))


DATA PATH: /root/.cache/kagglehub/datasets/prithivsakthiur/deepfake-vs-real-60k/versions/1/deepfake-vs-real-60k
INSIDE: ['Real', 'Fake']


In [None]:
# data_path = path # Ensure data_path is correct
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(299,299),
    batch_size=16,
    label_mode="categorical"
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    data_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(299,299),
    batch_size=16,
    label_mode="categorical"
)

Found 57071 files belonging to 2 classes.
Using 45657 files for training.
Found 57071 files belonging to 2 classes.
Using 11414 files for validation.


In [None]:
train_ds = train_ds.map(lambda x,y: (preprocess_input(x), y))
val_ds   = val_ds.map(lambda x,y: (preprocess_input(x), y))


In [None]:
base_model = Xception(weights='imagenet', include_top=False, input_shape=(299,299,3))

for layer in base_model.layers:
    layer.trainable = True

x = GlobalAveragePooling2D()(base_model.output)
out = Dense(2, activation='softmax')(x)

model = Model(base_model.input, out)


In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=3)


Epoch 1/3
[1m2854/2854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1393s[0m 458ms/step - accuracy: 0.9894 - loss: 0.0343 - val_accuracy: 0.9976 - val_loss: 0.0084
Epoch 2/3
[1m2854/2854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1226s[0m 429ms/step - accuracy: 0.9975 - loss: 0.0090 - val_accuracy: 0.9992 - val_loss: 0.0026
Epoch 3/3
[1m2854/2854[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1263s[0m 442ms/step - accuracy: 0.9978 - loss: 0.0073 - val_accuracy: 0.9987 - val_loss: 0.0051


In [None]:
loss, acc = model.evaluate(val_ds)
print(f"Baseline Validation Accuracy: {acc*100:.2f}%")


[1m714/714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 168ms/step - accuracy: 0.9986 - loss: 0.0050
Baseline Validation Accuracy: 99.87%


In [None]:
import tensorflow as tf

loss_object = tf.keras.losses.CategoricalCrossentropy()

def fgsm_attack(images, labels, model, epsilon=0.01):
    images = tf.cast(images, tf.float32)

    with tf.GradientTape() as tape:
        tape.watch(images)
        preds = model(images)
        loss = loss_object(labels, preds)

    gradient = tape.gradient(loss, images)
    signed_grad = tf.sign(gradient)
    adv_images = images + epsilon * signed_grad
    adv_images = tf.clip_by_value(adv_images, -1, 1)  # because of preprocess_input

    return adv_images


In [None]:
adv_imgs = []
adv_lbls = []

for imgs, lbls in val_ds.take(20):  # small portion is enough
    adv = fgsm_attack(imgs, lbls, model)
    adv_imgs.append(adv)
    adv_lbls.append(lbls)

adv_imgs = tf.concat(adv_imgs, axis=0)
adv_lbls = tf.concat(adv_lbls, axis=0)


In [None]:
loss, adv_acc = model.evaluate(adv_imgs, adv_lbls, verbose=0)
print(f"Accuracy on FGSM adversarial images: {adv_acc*100:.2f}%")


Accuracy on FGSM adversarial images: 77.50%


In [None]:
import cv2
import numpy as np

def blur_defense(images):
    out = []
    for img in images:
        img_np = img.numpy()
        img_np = cv2.GaussianBlur(img_np, (5,5), 0)
        out.append(img_np)
    return tf.convert_to_tensor(out)

blurred = blur_defense(adv_imgs)
loss, blur_acc = model.evaluate(blurred, adv_lbls, verbose=0)
print(f"Accuracy after Blur Defense: {blur_acc*100:.2f}%")


Accuracy after Blur Defense: 96.88%


In [None]:
import pywt

def wavelet_defense(images):
    processed = []
    for img in images:
        img = img.numpy()
        coeffs = pywt.dwt2(img[:,:,0], 'haar')
        cA, (cH, cV, cD) = coeffs
        rec = pywt.idwt2((cA, (None, None, None)), 'haar')
        rec = np.stack([rec]*3, axis=-1)
        processed.append(rec)
    return tf.convert_to_tensor(processed)

wave_imgs = wavelet_defense(adv_imgs)
loss, wave_acc = model.evaluate(wave_imgs, adv_lbls, verbose=0)
print(f"Accuracy after Wavelet Defense: {wave_acc*100:.2f}%")


Accuracy after Wavelet Defense: 93.75%
