In [1]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (427 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.8/427.8 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pycocotools
Successfully installed pycocotools-2.0.8


In [2]:
import torch
import torchvision
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets.coco import CocoDetection
from torchvision import transforms
import os
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
data_dir = '/kaggle/input/dataset0613-cs331'
ann_file_train = os.path.join(data_dir, 'train/_annotations.coco.json')
ann_file_val = os.path.join(data_dir, 'valid/_annotations.coco.json')
ann_file_test = os.path.join(data_dir, 'test/_annotations.coco.json')

In [5]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
train_dataset = CocoDetection(root=os.path.join(data_dir, 'train'), annFile=ann_file_train, transform=F.to_tensor)
val_dataset = CocoDetection(root=os.path.join(data_dir, 'valid'), annFile=ann_file_val, transform=F.to_tensor)
test_dataset = CocoDetection(root=os.path.join(data_dir, 'test'), annFile=ann_file_test, transform=F.to_tensor)

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [8]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [9]:
model = ssdlite320_mobilenet_v3_large(pretrained=True)
model = model.to(device)

Downloading: "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth" to /root/.cache/torch/hub/checkpoints/ssdlite320_mobilenet_v3_large_coco-a79551df.pth
100%|██████████| 13.4M/13.4M [00:00<00:00, 111MB/s] 


In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [11]:
#for param in model.backbone.parameters():
#    param.requires_grad = False

In [12]:
writer = SummaryWriter()

In [13]:
num_epochs = 50
#freeze_epochs = 5
classification_weight = 1.0
localization_weight = 1.0

for epoch in range(num_epochs):
    #if epoch == freeze_epochs:
    #    for param in model.backbone.parameters():
    #        param.requires_grad = True

    running_classification_loss = 0.0
    running_localization_loss = 0.0

    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)

        processed_targets = []
        for target_list in targets:
            boxes = []
            labels = []
            for obj in target_list:
                x_min, y_min, width, height = obj['bbox']
                if width > 0 and height > 0:
                    boxes.append([x_min, y_min, x_min + width, y_min + height])
                    labels.append(obj['category_id'])
            if len(boxes) > 0:
                processed_targets.append({
                    'boxes': torch.tensor(boxes, dtype=torch.float32).to(device),
                    'labels': torch.tensor(labels, dtype=torch.int64).to(device)
                })

        if not processed_targets:
            continue

        optimizer.zero_grad()

        loss_dict = model(images, processed_targets)
        classification_loss = loss_dict["classification"]
        localization_loss = loss_dict["bbox_regression"]

        total_loss = (classification_weight * classification_loss +
                      localization_weight * localization_loss)

        total_loss.backward()
        optimizer.step()

        running_classification_loss += classification_loss.item()
        running_localization_loss += localization_loss.item()

    writer.add_scalar('Loss/Classification', running_classification_loss / len(train_loader), epoch)
    writer.add_scalar('Loss/Localization', running_localization_loss / len(train_loader), epoch)

    print(f"Epoch {epoch + 1}/{num_epochs}, "
          f"Classification Loss: {running_classification_loss / len(train_loader):.4f}, "
          f"Localization Loss: {running_localization_loss / len(train_loader):.4f}")

    scheduler.step()

    model.eval()
    all_predictions = []

    for images, targets in val_loader:
        images = list(image.to(device) for image in images)
        outputs = model(images)

        for i, output in enumerate(outputs):
            image_id = targets[i][0]["image_id"] if isinstance(targets[i], list) else targets[i]["image_id"]

            for box, label, score in zip(output["boxes"], output["labels"], output["scores"]):
                all_predictions.append({
                    "image_id": int(image_id),
                    "category_id": int(label),
                    "bbox": [
                        float(box[0]),
                        float(box[1]),
                        float(box[2] - box[0]),
                        float(box[3] - box[1])
                    ],
                    "score": float(score)
                })

    predictions_file = "predictions.json"
    with open(predictions_file, "w") as f:
        json.dump(all_predictions, f)

    coco_gt = COCO(ann_file_val)
    coco_dt = coco_gt.loadRes(predictions_file)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    mAP = coco_eval.stats[0]
    print(f"Epoch {epoch + 1}/{num_epochs}, mAP: {mAP:.4f}")

writer.close()

Epoch 1/50, Classification Loss: 2.1828, Localization Loss: 0.2537
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.31s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.42s).
Accumulating evaluation results...
DONE (t=0.23s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.606
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.858
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.784
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.606
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.728
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.

In [14]:
torch.save(model, 'bestmodelv5x2_SSD320lite.pth')