In [None]:
# Colab: Mount Google Drive
import sys, os
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    print('Drive mounted at /content/drive')
else:
    print('Not in Colab; skipping drive mount.')

# TMN Mask R-CNN Training (Dedicated Notebook)
This notebook focuses only on training to avoid mixing with augmentation tasks. It sets up GPU, loads DS2 Dense TMN from Drive, trains Mask R-CNN, and saves checkpoints.

In [None]:
# Proje Yapılandırması ve Kütüphanelerin İçe Aktarılması
import os, sys, json, glob, time
import torch, torchvision
from PIL import Image
from pathlib import Path
print('GPU:', torch.cuda.is_available(), torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU')

In [None]:
# Hücre Organizasyonu: Etiketler ve Bölümler
# Tags: preprocess, train, eval, logs
print('Tags: preprocess/train/eval/logs')

In [None]:
# Parametreleştirme: Config Sözlüğü ve Dataclass
from dataclasses import dataclass

@dataclass
class Config:
    # Artık DS2 Dense (SRC_ROOT) değil, TMN çıktılarından eğiteceğiz
    OUT_ROOT: str = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_dense_tmn'
    IMG_ROOT: str = '/content/drive/MyDrive/omr_dataset/dataset/ds2/ds2_dense_tmn/images'
    BATCH_SIZE: int = 8
    EPOCHS: int = 3
    LR: float = 0.005
    MOMENTUM: float = 0.9
    WEIGHT_DECAY: float = 0.0005

cfg = Config()
print(cfg)

In [None]:
# Günlükleme (Logging)
import logging, os
LOG_DIR = os.path.join(cfg.OUT_ROOT, 'logs')
os.makedirs(LOG_DIR, exist_ok=True)
logging.basicConfig(level=logging.INFO, handlers=[
    logging.FileHandler(os.path.join(LOG_DIR, 'train.log')),
    logging.StreamHandler()
])
logging.info('Logging initialized')

In [None]:
# Veri İşleme Akışı ve Dataset
import torch
from torch.utils.data import Dataset, DataLoader

class DS2TMNDataset(Dataset):
    def __init__(self, images_dir, json_paths, transform=None):
        import json
        self.images_dir = images_dir
        self.transform = transform
        self.images = []
        self.ann_by_img = {}
        for jp in json_paths:
            with open(jp,'r',encoding='utf-8') as f:
                data = json.load(f)
            imgs = data.get('images') or []
            anns = data.get('annotations') or {}
            if isinstance(imgs, dict):
                imgs = list(imgs.values())
            for im in imgs:
                fn = im.get('filename') or im.get('file_name')
                if not fn:
                    continue
                self.images.append({'id': int(im.get('id')), 'filename': fn})
            for k,v in anns.items():
                img_id = int(v.get('img_id'))
                self.ann_by_img.setdefault(img_id, []).append(v)
        seen = set(); uniq=[]
        for im in self.images:
            if im['filename'] in seen: continue
            seen.add(im['filename']); uniq.append(im)
        self.images = uniq
        self.to_tensor = torchvision.transforms.ToTensor()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        im = self.images[idx]
        fn = im['filename']
        path = os.path.join(cfg.IMG_ROOT, fn)
        img = Image.open(path).convert('RGB')
        anns = self.ann_by_img.get(im['id'], [])
        boxes=[]; labels=[]
        for a in anns:
            b = a.get('a_bbox') or a.get('bbox')
            cats = a.get('cat_id') or []
            if b and len(b)>=4:
                boxes.append([b[0], b[1], b[2], b[3]])
                lab = int(cats[0]) if (isinstance(cats, list) and cats) else 0
                labels.append(lab)
        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
            'image_id': torch.tensor([im['id']])
        }
        img = self.transform(img) if self.transform else self.to_tensor(img)
        return img, target

train_jsons = sorted(glob.glob(f"{cfg.OUT_ROOT}/jsonlar/*train*.json"))
test_jsons = sorted(glob.glob(f"{cfg.OUT_ROOT}/jsonlar/*test*.json"))
train_ds = DS2TMNDataset(images_dir=cfg.IMG_ROOT, json_paths=train_jsons)
test_ds = DS2TMNDataset(images_dir=cfg.IMG_ROOT, json_paths=test_jsons)

def collate_fn(batch):
    return tuple(zip(*batch))

bs = cfg.BATCH_SIZE
if torch.cuda.is_available():
    name = torch.cuda.get_device_name(0).lower()
    if 'a100' in name or 'l4' in name or 'v100' in name:
        bs = max(bs, 8)
    elif 't4' in name or 'p100' in name:
        bs = min(bs, 4)
train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=2, collate_fn=collate_fn)
test_loader = DataLoader(test_ds, batch_size=bs, shuffle=False, num_workers=2, collate_fn=collate_fn)
print({'batch_size': bs, 'train_len': len(train_ds), 'test_len': len(test_ds)})

In [None]:
# Modülerleştirme ve Model Kurulumu
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

num_classes = 1 + 8  # background + 8 TMN
model = maskrcnn_resnet50_fpn(weights=None)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print('Model on', device)

In [None]:
# Eğitim Döngüsü (AMP) ve Checkpoint
from torch.optim import SGD
from torch.cuda.amp import GradScaler, autocast
import os, time

optimizer = SGD(model.parameters(), lr=cfg.LR, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY)
scaler = GradScaler(enabled=torch.cuda.is_available())
CKPT_DIR = os.path.join(cfg.OUT_ROOT, 'checkpoints')
os.makedirs(CKPT_DIR, exist_ok=True)

model.train()
for epoch in range(cfg.EPOCHS):
    t0 = time.time(); total = 0.0
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
        optimizer.zero_grad(set_to_none=True)
        with autocast(enabled=torch.cuda.is_available()):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
        scaler.scale(losses).backward()
        scaler.step(optimizer)
        scaler.update()
        total += losses.item()
    dur = time.time()-t0
    avg = total / max(1,len(train_loader))
    logging.info({'epoch': epoch+1, 'loss': round(avg,3), 'sec': round(dur,1)})
    ckpt = os.path.join(CKPT_DIR, f'maskrcnn_epoch{epoch+1}.pt')
    torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch+1}, ckpt)
    logging.info({'saved': ckpt})
print('Done')

In [None]:
# Değerlendirme ve Görselleştirme
import matplotlib.pyplot as plt
model.eval()
@torch.no_grad()
def eval_show(n=3, thr=0.5):
    shown = 0
    for images, targets in test_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)
        for img, out in zip(images, outputs):
            if shown>=n: return
            fig, ax = plt.subplots(figsize=(6,6))
            ax.imshow(img.permute(1,2,0).cpu().numpy())
            boxes = out['boxes'].cpu().numpy(); scores = out['scores'].cpu().numpy()
            for b,s in zip(boxes, scores):
                if s<thr: continue
                x1,y1,x2,y2 = b
                ax.add_patch(plt.Rectangle((x1,y1), x2-x1, y2-y1, fill=False, color='y', linewidth=2))
            ax.set_title(f'detections >= {thr}')
            plt.show(); shown += 1

eval_show(3, 0.5)

In [None]:
# COCO mAP Evaluation on test shard
import json, os
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Pick test JSON (first shard)
test_jsons = sorted(glob.glob(f"{cfg.OUT_ROOT}/jsonlar/*test*.json"))
assert len(test_jsons)>0, 'No test JSONs found under OUT_ROOT/jsonlar'
test_json = test_jsons[0]
cocoGt = COCO(test_json)

# Run model to collect detections in COCO format
dets = []
model.eval()
import numpy as np
@torch.no_grad()
def collect_detections(max_images=500):
    processed = 0
    for images, targets in test_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)
        for out, tgt in zip(outputs, targets):
            img_id = int(tgt['image_id'].item())
            boxes = out['boxes'].cpu().numpy()
            scores = out['scores'].cpu().numpy()
            labels = out['labels'].cpu().numpy()
            for b, s, lab in zip(boxes, scores, labels):
                if s < 0.05:
                    continue
                x1,y1,x2,y2 = b
                dets.append({
                    'image_id': img_id,
                    'category_id': int(lab),
                    'bbox': [float(x1), float(y1), float(x2-x1), float(y2-y1)],
                    'score': float(s)
                })
        processed += len(images)
        if processed >= max_images:
            break
collect_detections(max_images=2000)
print('Detections:', len(dets))

# Evaluate mAP
if len(dets)==0:
    print('No detections to evaluate.')
else:
    cocoDt = cocoGt.loadRes(dets)
    cocoEval = COCOeval(cocoGt, cocoDt, iouType='bbox')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
    # Save metrics
    metrics = {
        'AP@[.5:.95]': float(cocoEval.stats[0]),
        'AP@0.5': float(cocoEval.stats[1]),
        'AP@0.75': float(cocoEval.stats[2])
    }
    REPORT_DIR = os.path.join(cfg.OUT_ROOT, 'reports')
    os.makedirs(REPORT_DIR, exist_ok=True)
    with open(os.path.join(REPORT_DIR, 'metrics.json'), 'w') as f:
        json.dump(metrics, f)
    print('Saved metrics to', os.path.join(REPORT_DIR, 'metrics.json'))