In [None]:
!pip install -q --upgrade torchvision


In [2]:
# -----------------------------------------------
# 0) install extras (Torch+CUDA already in Colab)
# -----------------------------------------------
!pip install -q gdown albumentations opencv-python pycocotools tqdm

# 1) mount Drive so we can save checkpoints
from google.colab import drive
drive.mount('/content/drive')

# 2) download pcos_data.zip from Google Drive
FILE_ID = "1UBKaTpuDxOdrIE9iFjivWmbTpZsUCeMd"   # <- your file ID
!gdown --id $FILE_ID -O pcos_data.zip

# 3) ensure clean extract folder, then unzip quietly & overwrite
!rm -rf /content/data_split_v2
!unzip -o -qq pcos_data.zip -d /content
DATA_ROOT = "/content/data_split_v2"

print("✅ Dataset ready at", DATA_ROOT)


[0mDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Downloading...
From (original): https://drive.google.com/uc?id=1UBKaTpuDxOdrIE9iFjivWmbTpZsUCeMd
From (redirected): https://drive.google.com/uc?id=1UBKaTpuDxOdrIE9iFjivWmbTpZsUCeMd&confirm=t&uuid=0c7db9c2-c71f-41a8-95af-a8a6a1d06440
To: /content/pcos_data.zip
100% 98.4M/98.4M [00:00<00:00, 242MB/s]
✅ Dataset ready at /content/data_split_v2


In [13]:
# ─────────────────────────────────────────────────────────────
# CELL 2: Fast sanity-check of your newly generated COCO JSONs
# ─────────────────────────────────────────────────────────────
from pycocotools.coco import COCO
import os

DATA_ROOT = "/content/data_split_v2"   # <-- same as in your unzip cell

for split in ("train", "val", "test"):
    ann_file = os.path.join(DATA_ROOT, "annotations", f"{split}_coco.json")
    coco     = COCO(ann_file)
    n_imgs   = len(coco.imgs)
    n_anns   = len(coco.anns)
    print(f"{split:>5}: {n_imgs:4d} images, {n_anns:4d} GT boxes")


loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
train: 2450 images, 1595 GT boxes
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
  val:  543 images, 1276 GT boxes
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
 test:  539 images,  972 GT boxes


In [None]:
# Cell 3: Dataset, Model (MobileNetV3-FPN) & Full Training + Pseudo-Mask Pipeline
# with CLAHE preprocessing, improved pseudo-masking, and per-epoch validation + checkpointing
# (evaluate_map50 safely handles missing stats)

import os, cv2, json, numpy as np, torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Normalize
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.backbone_utils import mobilenet_backbone
from torchvision.models.detection.rpn import AnchorGenerator
from torch.nn.utils import clip_grad_norm_
from torch.amp import autocast, GradScaler
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# ────────────────────────────────────────────────────
# CONFIGURATION
# ────────────────────────────────────────────────────
DATA_ROOT    = "/content/data_split_v2"
BATCH_SIZE   = 8
DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE     = (256, 256)
H, W         = IMG_SIZE
NUM_CLASSES  = 2    # background + follicle
HEAD_EPOCHS  = 10   # initial training before pseudo-masks
FT_EPOCHS    = 20   # fine-tuning after pseudo-masks

# Preprocessing: CLAHE + blur
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

# ImageNet normalization
imagenet_norm = Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])

# ────────────────────────────────────────────────────
# DATASET & DATALOADER
# ────────────────────────────────────────────────────
class FollicleSegDS(Dataset):
    def __init__(self, root, ann_path, augment=False):
        self.root    = root
        self.coco    = COCO(ann_path)
        self.ids     = list(self.coco.imgs.keys())
        self.augment = augment

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        info = self.coco.loadImgs(self.ids[idx])[0]
        path = os.path.join(self.root, info["file_name"])

        # 1) Load grayscale
        gray = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

        # 2) CLAHE + blur
        gray = clahe.apply(gray)
        gray = cv2.GaussianBlur(gray, (5,5), 0)

        # 3) Train-time augment grayscale
        if self.augment:
            # gamma correction
            gamma = np.random.uniform(0.9, 1.1)
            lut   = np.array([((i/255.0)**gamma)*255 for i in range(256)]).astype(np.uint8)
            gray  = cv2.LUT(gray, lut)
            # brightness/contrast
            alpha = np.random.uniform(0.9,1.1); beta = np.random.uniform(-10,10)
            gray  = np.clip(alpha*gray + beta, 0,255).astype(np.uint8)
            # gaussian noise
            noise = np.random.randn(*gray.shape)*5
            gray  = np.clip(gray + noise, 0,255).astype(np.uint8)
            # rotation ±15°
            angle = np.random.uniform(-15,15)
            M     = cv2.getRotationMatrix2D((gray.shape[1]/2,gray.shape[0]/2), angle, 1)
            gray  = cv2.warpAffine(gray, M, (gray.shape[1],gray.shape[0]),
                                   flags=cv2.INTER_NEAREST)

        # 4) Resize & scale
        img   = cv2.resize(gray, IMG_SIZE).astype(np.float32)/255.0
        image = torch.from_numpy(img).unsqueeze(0).repeat(3,1,1)

        # 5) Load masks & build boxes
        ann_ids = self.coco.getAnnIds(imgIds=info["id"])
        anns    = self.coco.loadAnns(ann_ids)
        boxes, labels, masks = [], [], []
        for a in anns:
            m = self.coco.annToMask(a)
            if self.augment:
                m = cv2.warpAffine(m, M, (m.shape[1],m.shape[0]), flags=cv2.INTER_NEAREST)
            m = cv2.resize(m, IMG_SIZE, cv2.INTER_NEAREST)
            ys, xs = np.where(m>0)
            if xs.size and ys.size:
                x0,y0 = float(xs.min()), float(ys.min())
                x1,y1 = float(xs.max()), float(ys.max())
                x0,x1 = np.clip(x0,0,W-2), np.clip(x1,1,W-1)
                y0,y1 = np.clip(y0,0,H-2), np.clip(y1,1,H-1)
                if x1<=x0: x1=x0+1
                if y1<=y0: y1=y0+1
                boxes.append([x0,y0,x1,y1])
                labels.append(1)
                masks.append(torch.from_numpy(m.astype(np.uint8)))

        target = {
            "boxes":  torch.tensor(boxes, dtype=torch.float32) if boxes else torch.empty((0,4)),
            "labels": torch.tensor(labels, dtype=torch.int64)   if labels else torch.empty((0,),dtype=torch.int64),
            "masks":  torch.stack(masks)                        if masks else torch.empty((0,H,W),dtype=torch.uint8),
            "fname":  info["file_name"]
        }

        # 6) Final normalization
        image = imagenet_norm(image)
        return image, target

def collate_fn(batch):
    return tuple(zip(*batch))

def get_loaders(root):
    loaders = {}
    for split in ("train","val","test"):
        ds = FollicleSegDS(
            root=os.path.join(root,split),
            ann_path=os.path.join(root,"annotations",f"{split}_coco.json"),
            augment=(split=="train")
        )
        loaders[split] = DataLoader(
            ds, batch_size=BATCH_SIZE,
            shuffle=(split=="train"),
            num_workers=0, pin_memory=True,
            collate_fn=collate_fn
        )
    return loaders

# ────────────────────────────────────────────────────
# MODEL INITIALIZATION (MobileNetV3-FPN + custom anchors)
# ────────────────────────────────────────────────────
backbone = mobilenet_backbone("mobilenet_v3_large", pretrained=True, fpn=True)
backbone.out_channels = 256

anchor_generator = AnchorGenerator(
    sizes=((32,),(64,),(128,)),
    aspect_ratios=((0.5,1.0,2.0),)*3
)

model = MaskRCNN(
    backbone=backbone,
    num_classes=NUM_CLASSES,
    rpn_anchor_generator=anchor_generator
).to(DEVICE)

# ────────────────────────────────────────────────────
# EVALUATION HELPER (AP50) – safe stats access
# ────────────────────────────────────────────────────
def evaluate_map50(model, data_root, device, img_size, score_thr=0.3):
    annFile = os.path.join(data_root,"annotations","val_coco.json")
    imgDir  = os.path.join(data_root,"val")
    coco_gt = COCO(annFile)
    ids     = coco_gt.getImgIds()
    results = []
    model.eval()
    with torch.no_grad():
        for img_id in ids:
            info = coco_gt.loadImgs(img_id)[0]
            img  = cv2.imread(os.path.join(imgDir, info["file_name"]))
            rgb  = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            im   = cv2.resize(rgb, img_size)
            t    = torch.from_numpy(im.astype("float32")/255.).permute(2,0,1).to(device)
            t    = imagenet_norm(t)
            out  = model([t])[0]
            for box, sc, lb in zip(out["boxes"].cpu().numpy(),
                                   out["scores"].cpu().numpy(),
                                   out["labels"].cpu().numpy()):
                if sc < score_thr: continue
                x0,y0,x1,y1 = box
                results.append({
                    "image_id":    img_id,
                    "category_id": int(lb),
                    "bbox":        [float(x0),float(y0),float(x1-x0),float(y1-y0)],
                    "score":       float(sc)
                })
    resFile = "/content/val_results.json"
    with open(resFile, "w") as f: json.dump(results, f)
    coco_dt   = coco_gt.loadRes(resFile)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    coco_eval.params.imgIds = ids
    coco_eval.evaluate(); coco_eval.accumulate()
    stats = getattr(coco_eval, "stats", [])
    if isinstance(stats, (list, np.ndarray)) and len(stats) > 1:
        return float(stats[1])
    if isinstance(stats, (list, np.ndarray)) and len(stats) > 0:
        return float(stats[0])
    return 0.0

# ────────────────────────────────────────────────────
# TRAINING: HEAD TRAIN, PSEUDO-MASK, FINE-TUNE + CHECKPOINT
# ────────────────────────────────────────────────────
loaders = get_loaders(DATA_ROOT)

# 1) Initial training (HEAD_EPOCHS) on all parameters
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=HEAD_EPOCHS)
scaler    = GradScaler()

def non_empty(loader):
    for imgs, tgts in loader:
        if any(t["boxes"].numel()>0 for t in tgts):
            yield imgs, tgts

for ep in range(1, HEAD_EPOCHS+1):
    model.train(); total=0.0; cnt=0
    for imgs, tgts in non_empty(loaders["train"]):
        imgs    = [i.to(DEVICE) for i in imgs]
        targets = [{"boxes":t["boxes"].to(DEVICE),
                    "labels":t["labels"].to(DEVICE),
                    "masks":t["masks"].to(DEVICE)} for t in tgts]
        with autocast(device_type='cuda'):
            loss = sum(model(imgs, targets).values())
        scaler.scale(loss).backward()
        clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer); scaler.update(); optimizer.zero_grad()
        total+=loss.item(); cnt+=1
    print(f"Epoch {ep}/{HEAD_EPOCHS}  loss={total/cnt:.4f}")
    scheduler.step()

# 2) Pseudo-mask generation (threshold=0.5)
model.eval()
with torch.no_grad():
    for imgs, tgts in non_empty(loaders["train"]):
        outs = model([i.to(DEVICE) for i in imgs])
        for t, out in zip(tgts, outs):
            keep = out['scores'] > 0.5
            for i, m in enumerate(out['masks'][keep,0].cpu().numpy()):
                fname = t["fname"].replace(".jpg", f"_ps{i}.png")
                cv2.imwrite(os.path.join(DATA_ROOT, "train", fname),
                            (m>0.5).astype(np.uint8)*255)
# reload with pseudo-masks
loaders = get_loaders(DATA_ROOT)

# 3) Fine-tuning (FT_EPOCHS) with validation checkpointing
optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=FT_EPOCHS)
best_ap50 = 0.0

for ep in range(1, FT_EPOCHS+1):
    model.train(); total=0.0; cnt=0
    for imgs, tgts in non_empty(loaders["train"]):
        imgs    = [i.to(DEVICE) for i in imgs]
        targets = [{"boxes":t["boxes"].to(DEVICE),
                    "labels":t["labels"].to(DEVICE),
                    "masks":t["masks"].to(DEVICE)} for t in tgts]
        with autocast(device_type='cuda'):
            loss = sum(model(imgs, targets).values())
        scaler.scale(loss).backward()
        clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer); scaler.update(); optimizer.zero_grad()
        total+=loss.item(); cnt+=1
    scheduler.step()
    train_loss = total/cnt
    val_ap50   = evaluate_map50(model, DATA_ROOT, DEVICE, IMG_SIZE, score_thr=0.3)
    print(f"Fine Epoch {ep}/{FT_EPOCHS}  train_loss={train_loss:.4f}  val_AP50={val_ap50:.3f}")
    if val_ap50 > best_ap50:
        best_ap50 = val_ap50
        torch.save(model.state_dict(), "/content/best_maskrcnn.pth")
        print(f" → New best AP50: {best_ap50:.3f}")

print("✅ Training + pseudo-mask + validation complete.")


In [None]:
import os, cv2, json, torch
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# Paths
annFile = os.path.join(DATA_ROOT, "annotations", "val_coco.json")
resFile = "/content/val_detections.json"
IMG_DIR = os.path.join(DATA_ROOT, "val")

# Load ground truth
coco_gt = COCO(annFile)
img_ids = coco_gt.getImgIds()

model.eval()
results = []

for img_id in img_ids:
    info = coco_gt.loadImgs(img_id)[0]
    img_path = os.path.join(IMG_DIR, info["file_name"])
    orig     = cv2.imread(img_path)
    rgb      = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)
    resized  = cv2.resize(rgb, IMG_SIZE)

    # create a [3,H,W] tensor, NOT [1,3,H,W]
    tensor = torch.from_numpy(resized.astype("float32")/255.0).permute(2,0,1).to(DEVICE)
    tensor = imagenet_norm(tensor)   # still 3×H×W

    with torch.no_grad():
        out = model([tensor])[0]      # list of one 3D tensor

    boxes  = out["boxes"].cpu().numpy()
    scores = out["scores"].cpu().numpy()
    labels = out["labels"].cpu().numpy()

    for box,score,label in zip(boxes,scores,labels):
        x0,y0,x1,y1 = box
        results.append({
            "image_id":    img_id,
            "category_id": int(label),
            "bbox":        [float(x0), float(y0), float(x1-x0), float(y1-y0)],
            "score":       float(score),
        })

# write and evaluate
with open(resFile, "w") as f:
    json.dump(results, f)

coco_dt   = coco_gt.loadRes(resFile)
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
coco_eval.params.imgIds = img_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
