# Transfer FGSM attack generator - Surrogate Model: Resnext + LSTM (frame based)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Imports
import os
import glob
import cv2
import numpy as np
from tqdm import tqdm
from moviepy.editor import ImageSequenceClip

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms

Setup

In [None]:
# Model definition
class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False):
        super(Model, self).__init__()
        model = models.resnext50_32x4d(pretrained=True)
        self.model = nn.Sequential(*list(model.children())[:-2])
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, bidirectional)
        self.dp = nn.Dropout(0.4)
        self.linear1 = nn.Linear(hidden_dim if bidirectional else latent_dim, num_classes)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        b, t, c, h, w = x.shape
        x = x.view(b * t, c, h, w)
        fmap = self.model(x)
        x = self.avgpool(fmap)
        x = x.view(b, t, 2048)
        x_lstm, _ = self.lstm(x)
        return fmap, self.dp(self.linear1(x_lstm[:, -1, :]))


# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CHECKPOINT = "/content/drive/MyDrive/Models/model_87_acc_20_frames_final_data.pt"

_model = Model(2).to(device)
_model.load_state_dict(torch.load(CHECKPOINT, map_location=device))
_model.eval()

In [None]:
# Data preprocessing
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


def preprocess_frame(frame_bgr):
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    t = _transform(frame_rgb).unsqueeze(0).unsqueeze(0)  # (1,1,3,112,112)
    return t.to(device)


def forward_logits(x):
    _, logits = _model(x)
    return logits

In [None]:
# Frame attack
def fgsm_frame_attack(frame_bgr, true_label, epsilon=0.01):
    # enable RNN backward
    _model.train()

    x = preprocess_frame(frame_bgr)
    x.requires_grad = True

    logits = forward_logits(x)
    target = torch.tensor([true_label]).to(device)

    loss = F.cross_entropy(logits, target)
    loss.backward()

    grad = x.grad.data
    x_adv = x + epsilon * grad.sign()
    x_adv = torch.clamp(x_adv, -3, 3)

    # restore eval mode
    _model.eval()

    # denormalize back to uint8
    adv = x_adv.detach().cpu().numpy()[0, 0].transpose(1, 2, 0)
    adv = adv * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
    adv = np.clip(adv * 255, 0, 255).astype(np.uint8)

    adv_bgr = cv2.cvtColor(adv, cv2.COLOR_RGB2BGR)
    return adv_bgr


def fgsm_attack_video(input_path, output_path, true_label, epsilon=0.01, max_frames=70):
    cap = cv2.VideoCapture(input_path)
    frames = []
    count = 0

    while True:
        ret, frame = cap.read()
        if not ret or count >= max_frames:
            break

        adv = fgsm_frame_attack(frame, true_label, epsilon)
        frames.append(cv2.cvtColor(adv, cv2.COLOR_BGR2RGB))
        count += 1

    cap.release()

    if len(frames) == 0:
        print("no frames extracted")
        return

    clip = ImageSequenceClip(frames, fps=25)
    clip.write_videofile(
        output_path,
        codec="libx264",
        audio=False,
        verbose=False,
        logger=None
    )


In [None]:
# src and dst (change to the folder where your deepfakes are and where you want to save it)
src_dir = "/content/drive/MyDrive/faceforensics++/manipulated_sequences/DeepFakeDetection/c40/videos/"
output_dir = "/content/drive/MyDrive/faceforensics++/Adversarial_attacked_sequences/TransferAttacks/FGSM/frame_level/ResNext_LTSM/DeepfakeDetectionSet/"

In [None]:
# Wrapper function
def fgsm_attack(video_path, output_path, epsilon, true_label=0, max_frames=70):
    return fgsm_attack_video(
        input_path=video_path,
        output_path=output_path,
        true_label=true_label,
        epsilon=epsilon,
        max_frames=max_frames
    )


# The final attack generator
def FGSM(epsilon, true_label=0):
    outdir = os.path.join(output_dir, f"Epsilon{epsilon}")
    os.makedirs(outdir, exist_ok=True)

    for video_path in tqdm(glob.glob(os.path.join(src_dir, "*.mp4"))):
        fname = os.path.basename(video_path)
        save_path = os.path.join(outdir, fname)

        if os.path.exists(save_path):
            continue

        fgsm_attack(
            video_path=video_path,
            output_path=save_path,
            epsilon=epsilon,
            true_label=true_label
        )

In [None]:
FGSM(0.01)

  4%|‚ñç         | 128/3068 [06:18<2:24:43,  2.95s/it]


KeyboardInterrupt: 