# Import

In [97]:
import random
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Dict, List, Optional

import sklearn
import cv2
import torch
import torch.nn.functional as F
from PIL import Image
from tqdm import tqdm
from transformers import ViTForImageClassification, ViTImageProcessor
import wandb

# Settings

In [98]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [99]:
# MODEL_ID = "prithivMLmods/Deep-Fake-Detector-v2-Model"
MODEL_ID = "buildborderless/CommunityForensics-DeepfakeDet-ViT"
TEST_DIR = Path("./test_data")  # test 데이터 경로

# Submission
OUTPUT_DIR = Path("./output")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)  # output 폴더 없으면 생성

SAFE_MODEL_ID = MODEL_ID.replace("/", "_")
OUT_CSV = OUTPUT_DIR / f"{SAFE_MODEL_ID}_submission.csv"

In [100]:
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".jfif"}
VIDEO_EXTS = {".mp4", ".mov"}

# TARGET_SIZE = (224, 224)
TARGET_SIZE = (384, 384)
NUM_FRAMES = 10  # 비디오 샘플링 프레임 수

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

Device: cpu


# Utils

In [101]:
def uniform_frame_indices(total_frames: int, num_frames: int) -> np.ndarray:
    """비디오 프레임을 균등하게 샘플링"""
    if total_frames <= 0:
        return np.array([], dtype=int)
    if total_frames <= num_frames:
        return np.arange(total_frames, dtype=int)
    return np.linspace(0, total_frames - 1, num_frames, dtype=int)

def get_full_frame_padded(pil_img: Image.Image, target_size=(384, 384)) -> Image.Image:
    """전체 이미지를 비율 유지하며 정사각형 패딩 처리"""
    img = pil_img.convert("RGB")
    img.thumbnail(target_size, Image.BICUBIC)
    new_img = Image.new("RGB", target_size, (0, 0, 0))
    new_img.paste(img, ((target_size[0] - img.size[0]) // 2,
                        (target_size[1] - img.size[1]) // 2))
    return new_img

def read_rgb_frames(file_path: Path, num_frames: int = NUM_FRAMES) -> List[np.ndarray]:
    """이미지 또는 비디오에서 RGB 프레임 추출"""
    ext = file_path.suffix.lower()
    
    # 이미지 파일
    if ext in IMAGE_EXTS:
        try:
            img = Image.open(file_path).convert("RGB")
            return [np.array(img)]
        except Exception:
            return []
    
    # 비디오 파일
    if ext in VIDEO_EXTS:
        cap = cv2.VideoCapture(str(file_path))
        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        if total <= 0:
            cap.release()
            return []
        
        frame_indices = uniform_frame_indices(total, num_frames)
        frames = []
        
        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
            ret, frame = cap.read()
            if not ret:
                continue
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        cap.release()
        return frames
    
    return []

# Data Preprocessing

In [102]:
class PreprocessOutput:
    def __init__(
        self,
        filename: str,
        imgs: List[Image.Image],
        error: Optional[str] = None
    ):
        self.filename = filename
        self.imgs = imgs
        self.error = error

def preprocess_one(file_path: Path, num_frames: int = NUM_FRAMES) -> PreprocessOutput:
    """
    파일 하나에 대한 전처리 수행
    
    Args:
        file_path: 처리할 파일 경로
        num_frames: 비디오에서 추출할 프레임 수
    
    Returns:
        PreprocessOutput 객체
    """
    try:
        frames = read_rgb_frames(file_path, num_frames=num_frames)
              
        imgs: List[Image.Image] = []
        
        for rgb in frames:     
            imgs.append(get_full_frame_padded(Image.fromarray(rgb), TARGET_SIZE))
        
        return PreprocessOutput(file_path.name, imgs, None)
    
    except Exception as e:
        return PreprocessOutput(file_path.name, [], str(e))

# Model Load

In [103]:
print("Loading model...")
model = ViTForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
processor = ViTImageProcessor.from_pretrained(MODEL_ID,size={"height": 384, "width": 384}, do_resize=True)
model.eval()

print(f"Model loaded: {MODEL_ID}")
print(f"Model config: num_labels={model.config.num_labels}")
if hasattr(model.config, 'id2label'):
    print(f"id2label: {model.config.id2label}") #real:0,fake:1,

Loading model...
Model loaded: buildborderless/CommunityForensics-DeepfakeDet-ViT
Model config: num_labels=2
id2label: {0: 'LABEL_0', 1: 'LABEL_1'}


In [104]:
def infer_fake_probs(pil_images: List[Image.Image]) -> List[float]:
    if not pil_images:
        return []

    probs: List[float] = []

    with torch.inference_mode():
        inputs = processor(images=pil_images, return_tensors="pt",do_resize = False)  # get_full_frame_padded() 가 있으므로 resize 중복 방지
        inputs = {k: v.to(DEVICE, non_blocking=True) for k, v in inputs.items()}
        logits = model(**inputs).logits
        batch_probs = F.softmax(logits, dim=1)[:, 1]
        probs.extend(batch_probs.cpu().tolist())

    return probs

# Dataset 정의
- 학습시킬 dataset 처리
- ds = load_dataset(
    "Hemgg/deep-fake-detection-dfd-entire-original-dataset",
    streaming = True
)
- ds2 = load_dataset("OpenRL/DeepFakeFace",
                   streaming = True)

- ds3 = load_dataset("UniDataPro/deepfake-videos-dataset",
                   cache_dir="C:/Users/yjneo/workspace/hecto_deepfake/data")

In [105]:
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
import torch

In [106]:
# 스트리밍으로 사용 시 
import hashlib
from torch.utils.data import IterableDataset
from PIL import Image
import torch

class StreamingDeepfakeDataset(torch.utils.data.IterableDataset):
    def __init__(
        self,
        hf_dataset,
        processor=None,
        split="train",
        val_ratio=0.1,
    ):
        self.dataset = hf_dataset
        self.processor = processor
        self.split = split
        self.val_ratio = val_ratio

    def _infer_label(self, item):
        # item에 들어있는 모든 문자열 필드 결합
        text = " ".join(
            str(v).lower()
            for v in item.values()
            if isinstance(v, str)
        )

        if "wiki" in text:
            return 0  # real
        if any(k in text for k in ["inpainting", "insight", "text2img"]):
            return 1  # fake

        return None

    def __iter__(self):
        for idx, item in enumerate(self.dataset):
            # streaming-safe split (index 기반)
            is_val = (idx % int(1 / self.val_ratio)) == 0
            if (self.split == "train" and is_val) or (
                self.split == "val" and not is_val
            ):
                continue

            image = item["image"]
            if not isinstance(image, Image.Image):
                image = Image.fromarray(image)

            label = self._infer_label(item)
            if label is None:
                continue  # 판단 불가 샘플은 버림

            if self.processor:
                image = self.processor(
                    images=image,
                    return_tensors="pt"
                )["pixel_values"].squeeze(0)

            yield image, torch.tensor(label, dtype=torch.long)


In [107]:
from datasets import load_dataset
from itertools import chain

# ds1 = load_dataset(
#     "Hemgg/deep-fake-detection-dfd-entire-original-dataset",
#     split="train",
#     streaming=True
# )

ds2 = load_dataset(
    "OpenRL/DeepFakeFace",
    split="train",
)

# ds3 = load_dataset(
#     "UniDataPro/deepfake-videos-dataset",
#     split="train",
#     streaming=True
# )

# merged_ds = chain(ds1, ds2, ds3)


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but t

KeyboardInterrupt: 

In [None]:

train_dataset = StreamingDeepfakeDataset(
    hf_dataset=ds2,
    processor=processor,
    split="train"
)

val_dataset = StreamingDeepfakeDataset(
    hf_dataset=ds2,
    processor=processor,
    split="val"
)



# train 준비 

In [68]:
num_epochs = 10

In [69]:
# collate_fn 학습과 추론 입력 구조를 동일하게 유지 

def collate_fn(batch):
    images, labels = zip(*batch)
    return {
        "pixel_values": torch.stack(images),
        "labels": torch.tensor(labels)
    }



## 스트리밍 데이터 일 때

In [84]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=1,
    num_workers=0,        # 먼저 0으로 시작
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    num_workers=0,
    pin_memory=True
)


## model freeze

In [85]:
# backbone 먼저 freeze
for param in model.vit.parameters():
    param.requires_grad = False

In [86]:
# unfreeze
N = 2  # 마지막 N개 block unfreeze
for layer in model.vit.encoder.layer[-N:]:
    for param in layer.parameters():
        param.requires_grad = True


# 학습 되는 layer 확인 (선택)
trainable = sum(p.requires_grad for p in model.parameters())
total = sum(1 for _ in model.parameters())
print(f"Trainable params: {trainable} / {total}")

Trainable params: 34 / 200


In [87]:
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=Learning_Rate,
    weight_decay=1e-4
)



In [88]:
# 스트리밍 데이터일 때
max_train_steps = 100_000   # 직접 정함
warmup_steps = 2_000
from transformers import get_cosine_schedule_with_warmup

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=max_train_steps
)


In [89]:
# early stopping
class EarlyStopping:
    def __init__(self, patience=2, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_score = None
        self.counter = 0

    def step(self, score):
        if self.best_score is None:
            self.best_score = score
            return False  # stop = False

        if score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                return True  # stop = True
        else:
            self.best_score = score
            self.counter = 0

        return False


In [90]:
# training loop  
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()

def train_one_epoch(model, loader):
    model.train()
    total_loss = 0.0

    for batch in loader:
        batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}

        optimizer.zero_grad(set_to_none=True)

        with autocast():
            outputs = model(**batch)
            loss = outputs.loss

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        total_loss += loss.item()

    return total_loss / len(loader)


  scaler = GradScaler()


In [91]:
import torch.nn.functional as F

from sklearn.metrics import roc_auc_score


@torch.inference_mode()

def validate(model, loader):
    model.eval()
    total_loss = 0.0
    all_labels = []
    all_probs = []

    for batch in loader:
        batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        probs = torch.softmax(outputs.logits, dim=1)[:, 1]  # fake probability
        all_probs.extend(probs.cpu().tolist())
        all_labels.extend(batch["labels"].cpu().tolist())

    val_loss = total_loss / len(loader)
    val_auc = roc_auc_score(all_labels, all_probs)
    return val_loss, val_auc



# training

In [92]:
# wandb
num_epochs = 10
Learning_Rate = 1e-4 
import wandb

run = wandb.init(
    entity="yjneon339-kyonggi-university",   # 팀명 또는 계정명
    project="dacon_hecto_deepfake",          # 프로젝트명
    config={
        "learning_rate": Learning_Rate,
        "architecture": MODEL_ID,
        "dataset": 'hf_openrl',
        "epochs": num_epochs,
        "batch_size": train_loader.batch_size
    }
)


In [93]:
def validate(model, val_loader, max_steps):
    model.eval()
    total_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for step, (images, labels) in enumerate(val_loader):
            if step >= max_steps:
                break

            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images, labels=labels)
            total_loss += outputs.loss.item()

            probs = outputs.logits.softmax(dim=1)[:, 1]
            all_preds.append(probs.cpu())
            all_labels.append(labels.cpu())

    model.train()

    avg_loss = total_loss / max_steps
    val_auc = compute_auc(torch.cat(all_labels), torch.cat(all_preds))
    return avg_loss, val_auc


In [95]:
max_train_steps = 10
eval_interval = 2_000
max_val_steps = 1_000
from tqdm import tqdm

best_val_auc = 0.0
early_stopper = EarlyStopping(patience=3, min_delta=0.0)

global_step = 0
pbar = tqdm(total=max_train_steps, desc="Training (steps)")

model.train()

for batch in train_loader:
    print("BATCH RECEIVED")

    images, labels = batch

    images = images.to(DEVICE, non_blocking=True)
    labels = labels.to(DEVICE, non_blocking=True)

    outputs = model(images, labels=labels)
    loss = outputs.loss
    print("LOSS:", loss.item())

    loss.backward()
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad(set_to_none=True)

    global_step += 1
    pbar.update(1)

    # ===== Logging =====
    if global_step % 100 == 0:
        wandb.log({
            "train/loss": loss.item(),
            "step": global_step,
            "lr": scheduler.get_last_lr()[0],
        })

    # ===== Validation =====
    if global_step % eval_interval == 0:
        val_loss, val_auc = validate(
            model,
            val_loader,
            max_steps=max_val_steps
        )

        wandb.log({
            "val/loss": val_loss,
            "val/auc": val_auc,
            "step": global_step,
        })

        # tqdm 표시
        pbar.set_postfix({
            "train_loss": f"{loss.item():.4f}",
            "val_loss": f"{val_loss:.4f}",
            "val_auc": f"{val_auc:.4f}",
        })

        # best model 저장
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            torch.save(model.state_dict(), "best_model.pt")

        # early stopping
        if early_stopper.step(val_auc):
            pbar.write(
                f"Early stopping at step {global_step} "
                f"(best val_auc={best_val_auc:.4f})"
            )
            break

    if global_step >= max_train_steps:
        break

pbar.close()


Training (steps):   0%|          | 0/100000 [00:36<?, ?it/s]


KeyboardInterrupt: 

In [96]:
from time import time

t0 = time()
it = iter(train_loader)
batch = next(it)
print("Loaded one batch in", time() - t0, "seconds")

images, labels = batch
print(images.shape, labels)


KeyboardInterrupt: 

In [30]:
import sys
print(sys.executable)


c:\Users\yjneo\anaconda3\envs\hecto\python.exe


# Inference

In [27]:
# 1. 모델 아티팩트 생성
artifact = wandb.Artifact('unfreeze_2blocks_model', type='model')
artifact.add_file("unfreeze_2blocks_model.pt")

# 2. wandb에 로그
wandb.log_artifact(artifact)

ValueError: Path is not a file: 'unfreeze_2blocks_model.pt'

In [46]:
# 아티팩트 가져오기
import wandb

run = wandb.init(project="dacon_hecto_deepfake", job_type="inference")

artifact = run.use_artifact(
    "yjneon339-kyonggi-university/dacon_hecto_deepfake/unfreeze_2blocks_model:v0",
    type="model"
)

artifact_dir = artifact.download()
print(artifact_dir)


DEVICE = torch.device("cpu")
model = ViTForImageClassification.from_pretrained(MODEL_ID)
state_dict = torch.load(
    f"{artifact_dir}/unfreeze_2blocks_model.pt",
    map_location=DEVICE
)

model.load_state_dict(state_dict)
model.to(DEVICE)
model.eval()  

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


[34m[1mwandb[0m: Downloading large artifact 'unfreeze_2blocks_model:v0', 137.36MB. 1 files...
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 00:00:00.4 (366.3MB/s)


c:\Users\yjneo\workspace\hecto_deepfake\notebooks\artifacts\unfreeze_2blocks_model-v0


  state_dict = torch.load(


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=384, out_features=3072, bias=True)
            (intermed

In [49]:
TEST_DIR = Path("C:/Users/yjneo/Downloads/open/test_data")  # test 데이터 경로

In [50]:
files = sorted([p for p in TEST_DIR.iterdir() if p.is_file()])
print(f"Test data length: {len(files)}")

results: Dict[str, float] = {}

# 전처리 및 추론
for file_path in tqdm(files, desc="Processing"):
    out = preprocess_one(file_path)
    
    # 1. 에러 로깅
    if out.error:
        print(f"[WARN] {out.filename}: {out.error}")
    
    # 2. 정상 추론
    elif out.imgs:
        probs = infer_fake_probs(out.imgs)
        results[out.filename] = float(np.mean(probs)) if probs else 0.0
    
    # 3. 둘 다 없으면 0.0 (real)
    else:
        results[out.filename] = 0.0

print(f"Inference completed. Processed: {len(results)} files")

Test data length: 500


Processing: 100%|██████████| 500/500 [23:30<00:00,  2.82s/it]

Inference completed. Processed: 500 files





# Submission

In [51]:
submission = pd.read_csv('C:/Users/yjneo/workspace/hecto_deepfake/sample_submission.csv')
submission['prob'] = submission['filename'].map(results).fillna(0.0)

# CSV 저장
submission.to_csv(OUT_CSV, encoding='utf-8-sig', index=False)
print(f"Saved submission to: {OUT_CSV}")

Saved submission to: output\buildborderless_CommunityForensics-DeepfakeDet-ViT_submission.csv
