In [None]:
import os
import json
import random
from pathlib import Path
from tqdm import tqdm
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.roi_heads import RoIHeads
from dotenv import load_dotenv
import wandb

In [None]:
load_dotenv()

TRAIN_JSON = Path(os.getenv('TRAIN_JSON'))
VAL_JSON = Path(os.getenv('VAL_JSON'))
TRAIN_IMAGES_DIR = Path(os.getenv('TRAIN_CLEANED'))
VAL_IMAGES_DIR = Path(os.getenv('VAL_CLEANED'))
OUTPUT_DIR = Path(os.getenv('OUTPUT', './outputs'))
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_COCO_JSON = OUTPUT_DIR / 'train_coco.json'
VAL_COCO_JSON = OUTPUT_DIR / 'val_coco.json'
MODEL_SAVE_PATH = OUTPUT_DIR / 'fasterrcnn_multitask_best.pth'

print(f"✅ Train JSON: {TRAIN_JSON}")
print(f"✅ Val JSON: {VAL_JSON}")
print(f"✅ Train images: {TRAIN_IMAGES_DIR}")
print(f"✅ Val images: {VAL_IMAGES_DIR}")
print(f"✅ Output directory: {OUTPUT_DIR}")

In [None]:
wandb.login()
wandb.init(
    project="bike_parts_detection",
    name="fasterrcnn_multitask",
    config={
        "epochs": 20,
        "batch_size": 2,
        "learning_rate": 0.0001,
        "weight_decay": 0.0001,
        "num_workers": 2,
        "gradient_accumulation_steps": 4,
        "model_type": "MultiTaskFasterRCNN",
        "backbone": "ResNet50-FPN",
        "num_states": 4,
        "state_loss_weight": 0.5,
        "scheduler": "ReduceLROnPlateau",
        "scheduler_factor": 0.5,
        "scheduler_patience": 3,
        "optimizer": "AdamW",
        "mixed_precision": True,
        "device": "cuda" if torch.cuda.is_available() else "cpu",
    }
)

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"✅ Using device: {device}")
if torch.cuda.is_available():
    print(f"✅ GPU: {torch.cuda.get_device_name(0)}")
    print(f"✅ CUDA Version: {torch.version.cuda}")


In [None]:
def convert_to_coco(json_path, images_dir, save_path):
    with open(json_path, 'r') as f:
        data = json.load(f, strict=False)

    coco = {"images": [], "annotations": [], "categories": [], "state_categories": []}
    part_names = set()
    
    for content in data.values():
        part_names.update(content["parts"].keys())

    coco["categories"] = [{"id": i+1, "name": n} for i, n in enumerate(sorted(part_names))]
    coco["state_categories"] = [
        {"id": 0, "name": "intact"},
        {"id": 1, "name": "damaged"},
        {"id": 2, "name": "absent"},
        {"id": 3, "name": "occluded"},
    ]
    
    ann_id = 1
    for img_id, (img_name, content) in enumerate(data.items(), 1):
        h, w = content["image"]["height"], content["image"]["width"]
        coco["images"].append({"id": img_id, "file_name": img_name, "height": h, "width": w})
        
        for part_name, part in content["parts"].items():
            cat_id = next(c["id"] for c in coco["categories"] if c["name"] == part_name)
            bbox = [
                part["absolute_bounding_box"]["left"],
                part["absolute_bounding_box"]["top"],
                part["absolute_bounding_box"]["width"],
                part["absolute_bounding_box"]["height"]
            ]
            coco["annotations"].append({
                "id": ann_id,
                "image_id": img_id,
                "bbox": bbox,
                "category_id": cat_id,
                "state_id": part["object_state_class"],
                "area": bbox[2] * bbox[3],
                "iscrowd": 0
            })
            ann_id += 1

    with open(save_path, 'w') as f:
        json.dump(coco, f, indent=2)
    print(f"✅ Saved COCO file: {save_path} ({len(coco['images'])} images, {len(coco['annotations'])} annotations)")

convert_to_coco(TRAIN_JSON, TRAIN_IMAGES_DIR, TRAIN_COCO_JSON)
convert_to_coco(VAL_JSON, VAL_IMAGES_DIR, VAL_COCO_JSON)


In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

class BikePartsDataset(Dataset):
    def __init__(self, images_dir, coco_json, transforms=None):
        self.images_dir = Path(images_dir)
        self.transforms = transforms
        with open(coco_json, 'r') as f:
            self.coco = json.load(f)
        self.images = self.coco["images"]
        self.annotations = self.coco["annotations"]
        
        self.img_id_to_anns = {}
        for ann in self.annotations:
            img_id = ann["image_id"]
            if img_id not in self.img_id_to_anns:
                self.img_id_to_anns[img_id] = []
            self.img_id_to_anns[img_id].append(ann)

    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_path = self.images_dir / img_info["file_name"]
        
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            raise
        
        anns = self.img_id_to_anns.get(img_info["id"], [])

        boxes, labels, states = [], [], []
        for a in anns:
            x, y, w, h = a["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(a["category_id"])
            states.append(a["state_id"])

        target = {
            "boxes": torch.as_tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32),
            "labels": torch.as_tensor(labels, dtype=torch.int64) if labels else torch.zeros(0, dtype=torch.int64),
            "states": torch.as_tensor(states, dtype=torch.int64) if states else torch.zeros(0, dtype=torch.int64),
            "image_id": torch.tensor([img_info["id"]], dtype=torch.int64)
        }

        if self.transforms:
            img = self.transforms(img)
        return img, target

    def __len__(self):
        return len(self.images)

transform = transforms.Compose([transforms.ToTensor()])

train_dataset = BikePartsDataset(TRAIN_IMAGES_DIR, TRAIN_COCO_JSON, transform)
val_dataset = BikePartsDataset(VAL_IMAGES_DIR, VAL_COCO_JSON, transform)

train_loader = DataLoader(
    train_dataset, 
    batch_size=2, 
    shuffle=True, 
    num_workers=2, 
    collate_fn=collate_fn,
    pin_memory=True if torch.cuda.is_available() else False,
    prefetch_factor=1 if torch.cuda.is_available() else None
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=2, 
    shuffle=False, 
    num_workers=2, 
    collate_fn=collate_fn,
    pin_memory=True if torch.cuda.is_available() else False,
    prefetch_factor=1 if torch.cuda.is_available() else None
)

print(f"✅ Datasets ready: {len(train_dataset)} train, {len(val_dataset)} val")
print(f"✅ Train batches: {len(train_loader)}, Val batches: {len(val_loader)}")


In [None]:
class MultiTaskFasterRCNN(nn.Module):
    def __init__(self, num_parts, num_states):
        super().__init__()
        self.model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_parts + 1)
        
        self.state_head = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_states)
        )
        self.state_loss_fn = nn.CrossEntropyLoss()

    def forward(self, images, targets=None):
        if self.training and targets is not None:
            losses = self.model(images, targets)
            
            state_loss = torch.tensor(0.0, device=images[0].device)
            
            for i, target in enumerate(targets):
                if 'states' in target and len(target['states']) > 0:
                    features = self.model.backbone(images)
                    if isinstance(features, torch.Tensor):
                        features = {'0': features}
                    
                    proposals, _ = self.model.rpn(images, features, targets)
                    if len(proposals[i]) > 0:
                        box_features = self.model.roi_heads.box_roi_pool(features, proposals, images, targets)
                        box_features = self.model.roi_heads.box_head(box_features)
                        
                        if len(box_features) > 0:
                            num_valid = min(len(box_features), len(target['states']))
                            state_logits = self.state_head(box_features[:num_valid])
                            state_targets = target['states'][:num_valid].to(state_logits.device)
                            state_loss = state_loss + self.state_loss_fn(state_logits, state_targets)
            
            if isinstance(state_loss, torch.Tensor) and state_loss.requires_grad:
                losses['loss_state'] = state_loss * 0.5
            
            total_loss = sum(v for v in losses.values() if isinstance(v, torch.Tensor))
            return total_loss, losses
        else:
            return self.model(images)


In [None]:
with open(TRAIN_COCO_JSON, 'r') as f:
    train_coco = json.load(f,strict=False)
    
num_parts = len(train_coco["categories"])
num_states = 4

if torch.cuda.is_available():
    torch.cuda.empty_cache()

model = MultiTaskFasterRCNN(num_parts, num_states).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
scaler = torch.amp.GradScaler('cuda') if torch.cuda.is_available() else None

num_epochs = 20
best_val_loss = float('inf')
gradient_accumulation_steps = 4

for epoch in range(num_epochs):
    model.train()
    train_losses = []
    
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    optimizer.zero_grad()
    
    for batch_idx, (imgs, targets) in enumerate(pbar):
        imgs = [img.to(device) for img in imgs]
        targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]

        if scaler is not None:
            with torch.amp.autocast('cuda'):
                loss, losses_dict = model(imgs, targets)
            loss = loss / gradient_accumulation_steps
            scaler.scale(loss).backward()
        else:
            loss, losses_dict = model(imgs, targets)
            loss = loss / gradient_accumulation_steps
            loss.backward()

        train_losses.append(loss.item() * gradient_accumulation_steps)
        
        if (batch_idx + 1) % gradient_accumulation_steps == 0 or (batch_idx + 1) == len(train_loader):
            if scaler is not None:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()
        
        if batch_idx % 10 == 0:
            log_dict = {f"train_{k}": v.item() if isinstance(v, torch.Tensor) else v for k, v in losses_dict.items()}
            log_dict["train_loss"] = loss.item() * gradient_accumulation_steps
            wandb.log(log_dict)
        
        pbar.set_postfix({"loss": f"{loss.item() * gradient_accumulation_steps:.4f}"})
        
        if torch.cuda.is_available() and batch_idx % 50 == 0:
            torch.cuda.empty_cache()

    avg_train_loss = np.mean(train_losses)
    
    model.eval()
    val_losses = []
    with torch.no_grad():
        for imgs, targets in tqdm(val_loader, desc="Validating"):
            imgs = [img.to(device) for img in imgs]
            targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
            
            if scaler is not None:
                with torch.amp.autocast('cuda'):
                    loss, losses_dict = model(imgs, targets)
            else:
                loss, losses_dict = model(imgs, targets)
            
            val_losses.append(loss.item())
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    
    avg_val_loss = np.mean(val_losses)
    scheduler.step(avg_val_loss)
    
    wandb.log({
        "epoch": epoch + 1,
        "avg_train_loss": avg_train_loss,
        "avg_val_loss": avg_val_loss,
        "learning_rate": optimizer.param_groups[0]['lr']
    })
    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"  Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        wandb.save(str(MODEL_SAVE_PATH))
        print(f"  ✅ Saved best model (val_loss: {avg_val_loss:.4f})")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print(f"\n✅ Training completed! Best model saved to {MODEL_SAVE_PATH}")


In [None]:
import matplotlib.pyplot as plt
from torchvision.utils import draw_bounding_boxes

model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
model.eval()

with torch.no_grad():
    imgs, targets = next(iter(val_loader))
    img = imgs[0].to(device)
    output = model([img])[0]

print(f"Predicted boxes: {output['boxes'].shape}")
print(f"Predicted labels: {output['labels'].shape}")
print(f"Scores: {output['scores'][:5].cpu().numpy()}")

img_tensor = img.cpu().permute(1, 2, 0).clamp(0, 1)
plt.figure(figsize=(12, 8))
plt.imshow(img_tensor)
plt.axis('off')
plt.title(f"Predictions: {len(output['boxes'])} detections")
plt.show()
