# Import

In [1]:
import random
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional

import sklearn
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
from tqdm import tqdm
from transformers import ViTForImageClassification, ViTImageProcessor
import wandb

  from .autonotebook import tqdm as notebook_tqdm


# Settings

In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
# MODEL_ID = "prithivMLmods/Deep-Fake-Detector-v2-Model"
MODEL_ID = "buildborderless/CommunityForensics-DeepfakeDet-ViT"
TEST_DIR = Path("./test_data")  # test Îç∞Ïù¥ÌÑ∞ Í≤ΩÎ°ú

# Submission
OUTPUT_DIR = Path("./output")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)  # output Ìè¥Îçî ÏóÜÏúºÎ©¥ ÏÉùÏÑ±

SAFE_MODEL_ID = MODEL_ID.replace("/", "_")
OUT_CSV = OUTPUT_DIR / f"{SAFE_MODEL_ID}_submission.csv"

In [87]:
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".jfif"}
VIDEO_EXTS = {".mp4", ".mov"}

# TARGET_SIZE = (224, 224)
TARGET_SIZE = (384, 384)
NUM_FRAMES = 10  # ÎπÑÎîîÏò§ ÏÉòÌîåÎßÅ ÌîÑÎ†àÏûÑ Ïàò

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

Device: cpu


# Utils

In [5]:
def uniform_frame_indices(total_frames: int, num_frames: int) -> np.ndarray:
    """ÎπÑÎîîÏò§ ÌîÑÎ†àÏûÑÏùÑ Í∑†Îì±ÌïòÍ≤å ÏÉòÌîåÎßÅ"""
    if total_frames <= 0:
        return np.array([], dtype=int)
    if total_frames <= num_frames:
        return np.arange(total_frames, dtype=int)
    return np.linspace(0, total_frames - 1, num_frames, dtype=int)

def get_full_frame_padded(pil_img: Image.Image, target_size=(384, 384)) -> Image.Image:
    """Ï†ÑÏ≤¥ Ïù¥ÎØ∏ÏßÄÎ•º ÎπÑÏú® Ïú†ÏßÄÌïòÎ©∞ Ï†ïÏÇ¨Í∞ÅÌòï Ìå®Îî© Ï≤òÎ¶¨"""
    img = pil_img.convert("RGB")
    img.thumbnail(target_size, Image.BICUBIC)
    new_img = Image.new("RGB", target_size, (0, 0, 0))
    new_img.paste(img, ((target_size[0] - img.size[0]) // 2,
                        (target_size[1] - img.size[1]) // 2))
    return new_img

def read_rgb_frames(file_path: Path, num_frames: int = NUM_FRAMES) -> List[np.ndarray]:
    """Ïù¥ÎØ∏ÏßÄ ÎòêÎäî ÎπÑÎîîÏò§ÏóêÏÑú RGB ÌîÑÎ†àÏûÑ Ï∂îÏ∂ú"""
    ext = file_path.suffix.lower()
    
    # Ïù¥ÎØ∏ÏßÄ ÌååÏùº
    if ext in IMAGE_EXTS:
        try:
            img = Image.open(file_path).convert("RGB")
            return [np.array(img)]
        except Exception:
            return []
    
    # ÎπÑÎîîÏò§ ÌååÏùº
    if ext in VIDEO_EXTS:
        cap = cv2.VideoCapture(str(file_path))
        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if total <= 0:
            cap.release()
            return []
        
        frame_indices = uniform_frame_indices(total, num_frames)
        frames = []
        
        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
            ret, frame = cap.read()
            if not ret:
                continue
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        cap.release()
        return frames
    
    return []

# Data Preprocessing

In [6]:
class PreprocessOutput:
    def __init__(
        self,
        filename: str,
        imgs: List[Image.Image],
        error: Optional[str] = None
    ):
        self.filename = filename
        self.imgs = imgs
        self.error = error

def preprocess_one(file_path: Path, num_frames: int = NUM_FRAMES) -> PreprocessOutput:
    """
    ÌååÏùº ÌïòÎÇòÏóê ÎåÄÌïú Ï†ÑÏ≤òÎ¶¨ ÏàòÌñâ
    
    Args:
        file_path: Ï≤òÎ¶¨Ìï† ÌååÏùº Í≤ΩÎ°ú
        num_frames: ÎπÑÎîîÏò§ÏóêÏÑú Ï∂îÏ∂úÌï† ÌîÑÎ†àÏûÑ Ïàò
    
    Returns:
        PreprocessOutput Í∞ùÏ≤¥
    """
    try:
        frames = read_rgb_frames(file_path, num_frames=num_frames)
              
        imgs: List[Image.Image] = []
        
        for rgb in frames:     
            imgs.append(get_full_frame_padded(Image.fromarray(rgb), TARGET_SIZE))
        
        return PreprocessOutput(file_path.name, imgs, None)
    
    except Exception as e:
        return PreprocessOutput(file_path.name, [], str(e))

# Model Load

In [7]:
print("Loading model...")
model = ViTForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
processor = ViTImageProcessor.from_pretrained(MODEL_ID,size={"height": 384, "width": 384}, do_resize=True)
model.eval()

print(f"Model loaded: {MODEL_ID}")
print(f"Model config: num_labels={model.config.num_labels}")
if hasattr(model.config, 'id2label'):
    print(f"id2label: {model.config.id2label}") #real:0,fake:1,

Loading model...
Model loaded: buildborderless/CommunityForensics-DeepfakeDet-ViT
Model config: num_labels=2
id2label: {0: 'LABEL_0', 1: 'LABEL_1'}


In [8]:
def infer_fake_probs(pil_images: List[Image.Image]) -> List[float]:
    if not pil_images:
        return []

    probs: List[float] = []

    with torch.inference_mode():
        inputs = processor(images=pil_images, return_tensors="pt",do_resize = False)  # get_full_frame_padded() Í∞Ä ÏûàÏúºÎØÄÎ°ú resize Ï§ëÎ≥µ Î∞©ÏßÄ
        inputs = {k: v.to(DEVICE, non_blocking=True) for k, v in inputs.items()}
        logits = model(**inputs).logits
        batch_probs = F.softmax(logits, dim=1)[:, 1]
        probs.extend(batch_probs.cpu().tolist())

    return probs

# Dataset Ï†ïÏùò
- ÌïôÏäµÏãúÌÇ¨ dataset Ï≤òÎ¶¨
- ds = load_dataset(
    "Hemgg/deep-fake-detection-dfd-entire-original-dataset",
    streaming = True
)
- ds2 = load_dataset("OpenRL/DeepFakeFace",
                   streaming = True)

- ds3 = load_dataset("UniDataPro/deepfake-videos-dataset",
                   cache_dir="C:/Users/yjneo/workspace/hecto_deepfake/data")

In [9]:
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
import torch

In [10]:
# ÎùºÎ≤®ÎßÅ Ï†ïÏùò
LABEL_MAP = {
    "wiki": 0,           # real
    "inpainting": 1,     # fake
    "insight": 1,        # fake
    "text2img": 1,       # fake
}



In [11]:
# jpeg augmentation
import io
import random
from PIL import Image

class RandomJPEGCompression:
    def __init__(self, quality_range=(30, 100), p=0.5):
        self.quality_range = quality_range
        self.p = p

    def __call__(self, img):
        if random.random() > self.p:
            return img

        quality = random.randint(*self.quality_range)
        buffer = io.BytesIO()
        img.save(buffer, format="JPEG", quality=quality)
        buffer.seek(0)
        return Image.open(buffer).convert("RGB")


In [68]:
# random gamma
import random
import random
import torchvision.transforms.functional as TF

class RandomGamma:
    def __init__(self, gamma_range=(0.7, 1.5), p=0.5):
        self.gamma_range = gamma_range
        self.p = p

    def __call__(self, img):
        if random.random() > self.p:
            return img

        gamma = random.uniform(*self.gamma_range)
        return TF.adjust_gamma(img, gamma)


In [69]:
# augmentation transform Ï†ïÏùò
from torchvision import transforms
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize(448),
    transforms.RandomCrop(384),

    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.05,
    ),

    RandomGamma(gamma_range=(0.7, 1.5), p=0.4),
    RandomJPEGCompression(quality_range=(30, 100), p=0.4),

    transforms.RandomApply(
        [transforms.GaussianBlur(kernel_size=3)],
        p=0.2
    ),

    transforms.ToTensor(),
])

val_transform = transforms.Compose([
    transforms.Resize(448),
    transforms.CenterCrop(384),
    transforms.ToTensor(),
])



In [70]:
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image

from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
import torch
from torchvision import transforms

class DeepFakeImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        self.samples = []

        for folder in self.root_dir.iterdir():
            if not folder.is_dir():
                continue
            if folder.name not in LABEL_MAP:
                continue

            label = LABEL_MAP[folder.name]

            for img_path in folder.rglob("*"):
                if img_path.suffix.lower() in [".jpg", ".jpeg", ".png"]:
                    self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")

        # transform Ï†ÅÏö© (Tensor Î≥ÄÌôò)
        image = self.transform(image)

        return {
            "pixel_values": image,
            "labels": torch.tensor(label, dtype=torch.long)  # labelÎèÑ TensorÎ°ú
        }


In [15]:
# zip ÌååÏùºÎ°ú Îã§Ïö¥Î°úÎìú
from huggingface_hub import snapshot_download

local_dir = "./deepfakeface_raw"

snapshot_download(
    repo_id="OpenRL/DeepFakeFace",
    repo_type="dataset",          # ‚Üê Ïù¥ Ï§ÑÏù¥ ÏóÜÏñ¥ÏÑú 404Í∞Ä ÎÇú Í≤É
    allow_patterns=["*.zip"],
    local_dir=local_dir,
    local_dir_use_symlinks=False
)


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.
Fetching 4 files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:10<00:00,  2.53s/it]


'C:\\Users\\yjneo\\workspace\\hecto_deepfake\\notebooks\\deepfakeface_raw'

In [16]:
# ÏïïÏ∂ïÌï¥Ï†ú
import zipfile
from pathlib import Path

raw_dir = Path("./deepfakeface_raw")
out_dir = Path("./deepfakeface_extracted")

out_dir.mkdir(exist_ok=True)

for zip_path in raw_dir.glob("*.zip"):
    target_dir = out_dir / zip_path.stem
    target_dir.mkdir(exist_ok=True)

    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(target_dir)


In [71]:
# ÎùºÎ≤®ÎßÅ ÌôïÏù∏
full_train_dataset = DeepFakeImageDataset(
    "./deepfakeface_extracted",
    transform=train_transform
)

full_val_dataset = DeepFakeImageDataset(    # no augmentation
    "./deepfakeface_extracted",
    transform=val_transform
)

from collections import Counter
labels = [label for _, label in full_train_dataset.samples]
print(Counter(labels))


Counter({1: 90000, 0: 30000})


In [72]:
# train/test split
from torch.utils.data import random_split

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = random_split(
    full_train_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)


In [73]:
# üîπ ÏùºÎ∂ÄÎßå ÏÇ¨Ïö© ()
from torch.utils.data import Subset

train_subset_size = min(500, len(train_dataset))
val_subset_size = min(100, len(val_dataset))

small_train_dataset = Subset(train_dataset, range(train_subset_size))
small_val_dataset = Subset(val_dataset, range(val_subset_size))
print(len(small_train_dataset))
print(len(small_val_dataset))


500
100


# Dataset Ï§ÄÎπÑ

In [74]:
num_epochs = 1

In [75]:
# collate_fn ÌïôÏäµÍ≥º Ï∂îÎ°† ÏûÖÎ†• Íµ¨Ï°∞Î•º ÎèôÏùºÌïòÍ≤å Ïú†ÏßÄ 

def collate_fn(batch):
    images, labels = zip(*batch)
    return {
        "pixel_values": torch.stack(images),
        "labels": torch.tensor(labels)
    }



In [76]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    num_workers=0,        # Î®ºÏ†Ä 0ÏúºÎ°ú ÏãúÏûë
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=16,
    num_workers=0,
    pin_memory=True
)


## model Íµ¨ÏÑ±
- backboneÏùÑ freeze ÌïòÍ≥† classifierÎßå ÌïôÏäµÏãúÌÇ®Îã§.
- optimizer, scheduler, earlystopping

In [77]:
# backbone Î®ºÏ†Ä freeze
for param in model.vit.parameters():
    param.requires_grad = False

In [78]:
# # unfreeze
# N = 2  # ÎßàÏßÄÎßâ NÍ∞ú block unfreeze
# for layer in model.vit.encoder.layer[-N:]:
#     for param in layer.parameters():
#         param.requires_grad = True



In [79]:

# ÌïôÏäµ ÎêòÎäî layer ÌôïÏù∏ (ÏÑ†ÌÉù)
trainable = sum(p.requires_grad for p in model.parameters())
total = sum(1 for _ in model.parameters())
print(f"Trainable params: {trainable} / {total}")

Trainable params: 2 / 200


In [80]:
# Oprimizer/Scheduler
Learning_Rate = 1e-3
DEVICE = "cuda"
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=Learning_Rate,
    weight_decay=1e-4
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=len(train_loader) * num_epochs
)


In [81]:
# early stopping
class EarlyStopping:
    def __init__(self, patience=4, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_score = None
        self.counter = 0

    def step(self, score):
        if self.best_score is None:
            self.best_score = score
            return False  # stop = False

        if score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                return True  # stop = True
        else:
            self.best_score = score
            self.counter = 0

        return False


In [82]:
# training loop  
from torch.cuda.amp import autocast

def train_one_epoch(model, loader):
    model.train()
    total_loss = 0.0

    for batch in loader:
        batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            outputs = model(**batch)
            loss = outputs.loss

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        total_loss += loss.item()

    return total_loss / len(loader)


In [90]:
def train_one_epoch_cpu(model, loader):
    model.train()
    total_loss = 0.0

    for batch in loader:
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        optimizer.zero_grad()

        outputs = model(**batch)
        loss = outputs.loss

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


In [91]:
import torch.nn.functional as F

from sklearn.metrics import roc_auc_score


@torch.inference_mode()

def validate(model, loader):
    model.eval()
    total_loss = 0.0
    all_labels = []
    all_probs = []

    for batch in loader:
        batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        probs = torch.softmax(outputs.logits, dim=1)[:, 1]  # fake probability
        all_probs.extend(probs.cpu().tolist())
        all_labels.extend(batch["labels"].cpu().tolist())

    val_loss = total_loss / len(loader)
    val_auc = roc_auc_score(all_labels, all_probs)
    return val_loss, val_auc



# training

In [92]:
# # wandb
# num_epochs = 1
# Learning_Rate = 1e-4 
# import wandb

# run = wandb.init(
#     entity="yjneon339-kyonggi-university",   # ÌåÄÎ™Ö ÎòêÎäî Í≥ÑÏ†ïÎ™Ö
#     project="dacon_hecto_deepfake",          # ÌîÑÎ°úÏ†ùÌä∏Î™Ö
#     config={
#         "learning_rate": Learning_Rate,
#         "architecture": MODEL_ID,
#         "dataset": 'hf_openrl',
#         "epochs": num_epochs,
#         "batch_size": train_loader.batch_size
#     }
# )


In [93]:
def validate(model, loader):
    model.eval()
    total_loss = 0.0
    all_labels = []
    all_probs = []

    for batch in loader:
        batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        probs = torch.softmax(outputs.logits, dim=1)[:, 1]  # fake probability
        all_probs.extend(probs.cpu().tolist())
        all_labels.extend(batch["labels"].cpu().tolist())

    val_loss = total_loss / len(loader)
    val_auc = roc_auc_score(all_labels, all_probs)
    return val_loss, val_auc




In [94]:
from tqdm import tqdm
import torch

best_val_auc = 0.0
early_stopper = EarlyStopping(patience=2, min_delta=0.0)
epoch_bar = tqdm(range(num_epochs), desc="Training", position=0)

for epoch in epoch_bar:
    train_loss = train_one_epoch_cpu(model, train_loader)
    val_loss, val_auc = validate(model, val_loader)

    # tqdmÏóê Ïã§ÏãúÍ∞Ñ ÌëúÏãú
    epoch_bar.set_postfix({
        "train_loss": f"{train_loss:.4f}",
        "val_loss": f"{val_loss:.4f}",
        "val_auc": f"{val_auc:.4f}"
    })

    # # W&B Î°úÍπÖ
    # wandb.log({
    #     "epoch": epoch + 1,
    #     "train_loss": train_loss,
    #     "val_loss": val_loss,
    #     "val_auc": val_auc
    # })

    # best model Ï†ÄÏû•
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        torch.save(model.state_dict(), "best_model.pt")

    # early stopping
    if early_stopper.step(val_auc):
        epoch_bar.write(
            f"Early stopping triggered at epoch {epoch+1} "
            f"(best val_auc={early_stopper.best_score:.4f})"
        )
        break


Training:   0%|          | 0/1 [1:24:39<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# 1. Î™®Îç∏ ÏïÑÌã∞Ìå©Ìä∏ ÏÉùÏÑ±
artifact = wandb.Artifact('unfreeze_2blocks_model', type='model')
artifact.add_file("unfreeze_2blocks_model.pt")

# 2. wandbÏóê Î°úÍ∑∏
wandb.log_artifact(artifact)