In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from pathlib import Path
import torch

import glob
from PIL import Image
import cv2

COMP_DIR   = "/kaggle/input/recodai-luc-scientific-image-forgery-detection" 
TRAIN_DIR  = f"{COMP_DIR}/train_images"
MASK_DIR   = f"{COMP_DIR}/train_masks"
TEST_DIR   = f"{COMP_DIR}/test_images"
SUB_PATH = "submission.csv"

# Outputs
OUT_DIR = "/kaggle/working"
PREVIEW_DIR = f"{OUT_DIR}/preview"
os.makedirs(PREVIEW_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Visualization and sampling params
SEED                 = 42
DISPLAY_SIZE         = 768          # resize for display/speed (square)
GRID_SIZE            = 512          # size for grid thumbnails
MAX_GRID_IMAGES      = 12           # limit how many to draw in grids
TEMPLATE_MATCH_SCALE = 0.5          # downscale for template matching (speed)
np.random.seed(SEED)

print("TRAIN_DIR:", TRAIN_DIR)
print("MASK_DIR :", MASK_DIR)
print("PREVIEW to:", PREVIEW_DIR)

def build_items(train_dir: str, mask_dir: str):
    items = []
    for cls in ["authentic", "forged"]:
        img_dir = f"{train_dir}/{cls}"
        if not os.path.exists(img_dir):
            continue
        for p in glob.glob(os.path.join(img_dir, "*")):
            case_id = Path(p).stem
            mask_path = None
            if cls == "forged":
                cand = os.path.join(mask_dir, f"{case_id}.npy")
                mask_path = cand if os.path.exists(cand) else None
            items.append({
                "path": p,
                "case_id": case_id,
                "label": 1 if cls == "forged" else 0,
                "mask_path": mask_path
            })
    return items

items = build_items(TRAIN_DIR, MASK_DIR)

TRAIN_DIR: /kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images
MASK_DIR : /kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks
PREVIEW to: /kaggle/working/preview


In [2]:
# functions to rle encode for submission
import json
import numpy as np
import cv2
from PIL import Image
import torch

# Modified: Disabled njit due to version of numpy too high


def _rle_encode_jit(x: np.ndarray, fg_val: int = 1) -> list:
    dots = np.where(x.T.flatten() == fg_val)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def rle_encode(masks: list[np.ndarray], fg_val: int = 1) -> str:
    return ';'.join([json.dumps([int(x) for x in _rle_encode_jit(m.astype(np.uint8), fg_val)]) for m in masks])

In [3]:
### ResNet18 Model

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, 
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels)
            )

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(identity)
        out = F.relu(out)
        return out

class ResNet18_Custom(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet18_Custom, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 2, stride=1)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride):
        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
   
        for _ in range(1, num_blocks):
            layers.append(BasicBlock(out_channels, out_channels))
            
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [4]:
import os
import glob
import json
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import Image
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models
from tqdm import tqdm


class ForgerySupConDataset(Dataset):
    def __init__(self, items, patch_size=256, mode="train"):
        self.items = [x for x in items if x['mask_path'] is not None] 
        self.patch_size = patch_size
        self.mode = mode
        
        self.transform = T.Compose([T.ToTensor()])
        self.augment = T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomVerticalFlip(),
            T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.1),
            T.GaussianBlur(3)
        ])

    def __len__(self):
        return len(self.items)

    def extract_patch(self, img, y, x):
        h, w = img.shape[:2]
        y = max(0, min(y, h-1))
        x = max(0, min(x, w-1))
        
        half = self.patch_size // 2
        y1, y2 = max(0, y-half), min(h, y+half)
        x1, x2 = max(0, x-half), min(w, x+half)
        
        patch = img[y1:y2, x1:x2]
        
        if patch.shape[0] != self.patch_size or patch.shape[1] != self.patch_size:
            patch = cv2.resize(patch, (self.patch_size, self.patch_size))
        return patch

    def __getitem__(self, idx):
        item = self.items[idx]
        try:
            img = np.array(Image.open(item['path']).convert("RGB"))
            mask = np.load(item['mask_path'])
            if mask.ndim == 3: mask = mask[..., 0]
            
            if mask.shape != img.shape[:2]:
                mask = cv2.resize(mask, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_NEAREST)
            
            target = random.choice([0, 1])
            coords = np.argwhere(mask == target)
            if len(coords) == 0: 
                target = 1 - target
                coords = np.argwhere(mask == target)
            
            if len(coords) == 0: 
                coords = np.array([[img.shape[0]//2, img.shape[1]//2]])
                target = 0

            y, x = coords[np.random.choice(len(coords))]
            patch = self.extract_patch(img, y, x)
            
            patch_pil = Image.fromarray(patch)
            if self.mode == "train":
                patch_t = self.transform(self.augment(patch_pil))
            else:
                patch_t = self.transform(patch_pil)
                
            return patch_t, torch.tensor(target).long()
            
        except Exception as e:
            return torch.zeros(3, self.patch_size, self.patch_size), torch.tensor(0)


class SupConLoss(nn.Module):
    def __init__(self, temperature=0.07):
        super().__init__()
        self.temperature = temperature

    def forward(self, features, labels):
        features = F.normalize(features, dim=1)
        similarity_matrix = torch.matmul(features, features.T)
        
        labels = labels.contiguous().view(-1, 1)
        mask = torch.eq(labels, labels.T).float().to(features.device)
        
        logits_mask = torch.scatter(
            torch.ones_like(mask), 1, 
            torch.arange(features.shape[0]).view(-1, 1).to(features.device), 0
        )
        mask = mask * logits_mask
        
        logits = similarity_matrix / self.temperature
        exp_logits = torch.exp(logits) * logits_mask
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True) + 1e-6)
        
        sum_mask = mask.sum(1)
        sum_mask[sum_mask == 0] = 1
        mean_log_prob_pos = (mask * log_prob).sum(1) / sum_mask
        
        loss = - mean_log_prob_pos
        return loss.mean()


class ForgeryPipeline:
    def __init__(self, device="cuda"):
        self.device = device
        
        # self.backbone = models.resnet18(weights="DEFAULT")
        self.backbone = ResNet18_Custom(num_classes=512)
        self.backbone.fc = nn.Identity() 
        
        self.projector = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 64)
        )
        
        self.classifier = nn.Linear(512, 2) 
        
        self.backbone.to(device)
        self.projector.to(device)
        self.classifier.to(device)

    def train_stage1_supcon(self, items, epochs=1, batch_size=64):
        print("\n=== Stage 1: Supervised Contrastive Pre-training ===")
        dataset = ForgerySupConDataset(items, mode="train")
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
        
        optimizer = torch.optim.Adam(
            list(self.backbone.parameters()) + list(self.projector.parameters()), 
            lr=1e-4
        )
        criterion = SupConLoss()
        
        self.backbone.train()
        self.projector.train()
        
        for epoch in range(epochs):
            total_loss = 0
            pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")
            for img, label in pbar:
                img, label = img.to(self.device), label.to(self.device)
                
                feats = self.backbone(img)
                proj = self.projector(feats)
                
                loss = criterion(proj, label)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                pbar.set_postfix({'loss': f"{loss.item():.4f}"})

    def train_stage2_linear(self, items, epochs=1, batch_size=64):
        print("\n=== Stage 2: Training Linear Classifier ===")
        dataset = ForgerySupConDataset(items, mode="train")
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
        
        for param in self.backbone.parameters():
            param.requires_grad = False
            
        optimizer = torch.optim.Adam(self.classifier.parameters(), lr=1e-3)
        criterion = nn.CrossEntropyLoss()
        
        self.backbone.eval()
        self.classifier.train()
        
        for epoch in range(epochs):
            correct = 0
            total = 0
            total_loss = 0
            
            pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")
            for img, label in pbar:
                img, label = img.to(self.device), label.to(self.device)
                
                with torch.no_grad():
                    feats = self.backbone(img)
                
                logits = self.classifier(feats)
                loss = criterion(logits, label)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                _, pred = torch.max(logits, 1)
                correct += (pred == label).sum().item()
                total += label.size(0)
                total_loss += loss.item()
                
                pbar.set_postfix({'acc': f"{correct/total:.2%}", 'loss': f"{loss.item():.4f}"})

    # Infer image
    @torch.no_grad()
    def scan_image(self, img_path, patch_size=256, stride=64):
        self.backbone.eval()
        self.classifier.eval()
        
        try:
            img_pil = Image.open(img_path).convert("RGB")
        except:
            return None, None, 0.0

        # Resize if too small
        if img_pil.width < patch_size or img_pil.height < patch_size:
            ratio = patch_size / min(img_pil.width, img_pil.height)
            img_pil = img_pil.resize((int(img_pil.width*ratio)+1, int(img_pil.height*ratio)+1))
            
        img = np.array(img_pil)
        h, w = img.shape[:2]
        
        heatmap = np.zeros((h, w), dtype=np.float32)
        counts = np.zeros((h, w), dtype=np.float32)
        
        patches = []
        coords = []
        transform = T.Compose([T.ToTensor()])
        
        for y in range(0, h-patch_size+1, stride):
            for x in range(0, w-patch_size+1, stride):
                crop = img[y:y+patch_size, x:x+patch_size]
                patches.append(transform(Image.fromarray(crop)))
                coords.append((y, x))
        
        if not patches: return None, img, 0.0
        
        # batch_t = torch.stack(patches).to(self.device)
        batch_t = torch.stack(patches)
        probs = []
        
        for i in range(0, len(batch_t), 32):
            b = batch_t[i:i+32].to(self.device)
            # b = batch_t[i:i+32]
            feats = self.backbone(b)
            logits = self.classifier(feats)
            prob = F.softmax(logits, dim=1)[:, 1] 
            probs.append(prob.cpu().numpy())
            
        probs = np.concatenate(probs)
        
        max_prob = 0
        for p, (y, x) in zip(probs, coords):
            heatmap[y:y+patch_size, x:x+patch_size] += p
            counts[y:y+patch_size, x:x+patch_size] += 1
            max_prob = max(max_prob, p)
            
        counts[counts==0] = 1
        heatmap /= counts
        
        return heatmap, img, max_prob

In [5]:
# Training

import os, glob, json
from pathlib import Path
import pandas as pd
import numpy as np
from PIL import ImageOps
import cv2
import random

# Thresholds
GLOBAL_THRESHOLD = 0.5  # If max_prob < this, image is authentic
PIXEL_THRESHOLD = 0.5   # Heatmap > this becomes white in mask

test_paths = sorted(glob.glob(str(Path(TEST_DIR) / "*")))
print("Test images:", len(test_paths))

forged_paths = sorted(glob.glob(str(Path(f"{TRAIN_DIR}/forged") / "*")))
print("Forged images:", len(forged_paths))

# training
items = []
for p in forged_paths:
    case_id = Path(p).stem
    mask = f"{MASK_DIR}/{case_id}.npy"
    if os.path.exists(mask): items.append({'path': p, 'mask_path': mask})

print(f"Found {len(items)} training images")

pipeline = ForgeryPipeline(device="cuda" if torch.cuda.is_available() else "cpu")

pipeline.train_stage1_supcon(items, epochs=5) 
pipeline.train_stage2_linear(items, epochs=5)

Test images: 1
Forged images: 2751
Found 2751 training images

=== Stage 1: Supervised Contrastive Pre-training ===


Epoch 1/5: 100%|██████████| 43/43 [01:20<00:00,  1.87s/it, loss=4.1267]
Epoch 2/5: 100%|██████████| 43/43 [01:13<00:00,  1.70s/it, loss=4.1296]
Epoch 3/5: 100%|██████████| 43/43 [01:10<00:00,  1.63s/it, loss=4.1251]
Epoch 4/5: 100%|██████████| 43/43 [01:10<00:00,  1.64s/it, loss=4.1277]
Epoch 5/5: 100%|██████████| 43/43 [01:10<00:00,  1.64s/it, loss=4.1291]



=== Stage 2: Training Linear Classifier ===


Epoch 1/5: 100%|██████████| 43/43 [01:10<00:00,  1.64s/it, acc=91.42%, loss=0.2646]
Epoch 2/5: 100%|██████████| 43/43 [01:12<00:00,  1.68s/it, acc=93.82%, loss=0.2111]
Epoch 3/5: 100%|██████████| 43/43 [01:10<00:00,  1.63s/it, acc=94.77%, loss=0.1313]
Epoch 4/5: 100%|██████████| 43/43 [01:08<00:00,  1.60s/it, acc=93.53%, loss=0.2771]
Epoch 5/5: 100%|██████████| 43/43 [01:13<00:00,  1.70s/it, acc=94.18%, loss=0.1731]


In [6]:
AUTH_IMAGES = f"{TRAIN_DIR}/authentic"
FORGE_IMAGES = f"{TRAIN_DIR}/forged"

dirs = AUTH_IMAGES,FORGE_IMAGES
for d in dirs:
    auth_count,forge_count = 0,0
    print("testing directory: "+d)
    path_images = sorted(
        f for f in os.listdir(d)
        if f.lower().endswith((".png", ".jpg", ".jpeg"))
    )[500:700]
    for fname in path_images:
        heatmap, processed_img, max_prob = pipeline.scan_image(os.path.join(d, fname))
        if heatmap is None or max_prob < GLOBAL_THRESHOLD:
            auth_count += 1
        else:
            forge_count += 1
    print(f"auth count:{auth_count}, forge count: {forge_count}")

testing directory: /kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images/authentic
auth count:200, forge count: 0
testing directory: /kaggle/input/recodai-luc-scientific-image-forgery-detection/train_images/forged
auth count:199, forge count: 1


In [7]:
# Submission

print("\n=== Starting Inference on Test Set ===")
test_paths = sorted(glob.glob(str(Path(TEST_DIR) / "*")))
print("Test images:", len(test_paths))

rows = []

for p in tqdm(test_paths):
    case_id = Path(p).stem
    
    # Run Inference
    heatmap, processed_img, max_prob = pipeline.scan_image(p, stride=64)
    
    # Authentic vs Forged
    if heatmap is None or max_prob < GLOBAL_THRESHOLD:
        annot = "authentic"
    else:
        # Create Binary Mask
        original_img = Image.open(p)
        orig_w, orig_h = original_img.size
        heatmap_resized = cv2.resize(heatmap, (orig_w, orig_h))
        binary_mask = (heatmap_resized > PIXEL_THRESHOLD).astype(np.uint8)
        annot = rle_encode(binary_mask)
        
    rows.append({"case_id": case_id, "annotation": annot})

sub = pd.DataFrame(rows, columns=["case_id", "annotation"])

# Align with sample_submission if it exists
ss_path = str(Path(COMP_DIR) / "sample_submission.csv")
if os.path.exists(ss_path):
    ss = pd.read_csv(ss_path)
    ss["case_id"] = ss["case_id"].astype(str)
    sub["case_id"] = sub["case_id"].astype(str)
    
    # Left join to ensure all IDs are present
    final_sub = ss[["case_id"]].merge(sub, on="case_id", how="left")
    final_sub["annotation"] = final_sub["annotation"].fillna("authentic")
else:
    final_sub = sub
    
final_sub.to_csv(SUB_PATH, index=False)
print("✅ Wrote submission:", SUB_PATH)
print(final_sub.head(10))


=== Starting Inference on Test Set ===
Test images: 1


100%|██████████| 1/1 [00:02<00:00,  2.08s/it]

✅ Wrote submission: submission.csv
  case_id annotation
0      45  authentic



