In [1]:
import os
import sys
from pathlib import Path
import numpy as np



PROJECT_ROOT = Path("..").resolve()
print("PROJECT_ROOT:", PROJECT_ROOT)

# make `src` importable
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.data.dataloader import ForgeryDataset


PROJECT_ROOT: C:\Users\piiop\Desktop\Portfolio\Projects\RecodAI_LUC


In [3]:
# Cell 2: define paths (match your cv command)
train_authentic = PROJECT_ROOT / "data" / "train_images" / "authentic"   # <- adjust if needed
train_forged    = PROJECT_ROOT / "data" / "train_images" / "forged"      # <- adjust if needed
train_masks     = PROJECT_ROOT / "data" / "train_masks"                  # <- adjust if needed

print("authentic:", train_authentic, "exists:", train_authentic.exists())
print("forged   :", train_forged,    "exists:", train_forged.exists())
print("masks    :", train_masks,     "exists:", train_masks.exists())


authentic: C:\Users\piiop\Desktop\Portfolio\Projects\RecodAI_LUC\data\train_images\authentic exists: True
forged   : C:\Users\piiop\Desktop\Portfolio\Projects\RecodAI_LUC\data\train_images\forged exists: True
masks    : C:\Users\piiop\Desktop\Portfolio\Projects\RecodAI_LUC\data\train_masks exists: True


In [None]:
# Cell 3: build dataset and inspect
ds = ForgeryDataset(
    authentic_path=str(train_authentic),
    forged_path=str(train_forged),
    masks_path=str(train_masks),
    supp_forged_path=None,
    supp_masks_path=None,
    transform=None,
    is_train=True,
)

print("Total samples:", len(ds))

# if empty, inspect why
if len(ds) == 0:
    # Quickly list what files it *would* see
    authentic_files = sorted(os.listdir(train_authentic)) if train_authentic.exists() else []
    forged_files    = sorted(os.listdir(train_forged))    if train_forged.exists() else []
    mask_files      = sorted(os.listdir(train_masks))     if train_masks.exists() else []

    print(f"#authentic files: {len(authentic_files)}")
    print(f"#forged files   : {len(forged_files)}")
    print(f"#mask files     : {len(mask_files)}")

    print("First few authentic:", authentic_files[:5])
    print("First few forged   :", forged_files[:5])
    print("First few masks    :", mask_files[:5])
else:
    # peek at first few entries
    print("First 3 samples:")
    for sample in ds.samples[:3]:
        print(sample)


In [5]:
# Cell 4: quick DataLoader sanity check 
from torch.utils.data import DataLoader

dl = DataLoader(ds, batch_size=2, shuffle=False)
batch = next(iter(dl))
images, targets = batch

print("images shape:", images.shape)
print("targets keys:", targets.keys())
print("is_forged:", targets["is_forged"])


RuntimeError: stack expects each tensor to be equal size, but got [3, 512, 648] at entry 0 and [3, 1200, 1600] at entry 1

In [4]:
ds = ForgeryDataset(train_authentic, train_forged, train_masks, transform=None)
pos = sum(1 for s in ds.samples if s["is_forged"] and os.path.exists(s["mask_path"]) and np.load(s["mask_path"]).sum() > 0)
print("forged samples:", sum(s["is_forged"] for s in ds.samples))
print("forged-with-positive-mask:", pos)


forged samples: 2751
forged-with-positive-mask: 2751
