In [1]:
import os
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from pycocotools.coco import COCO
from PIL import Image
import torchvision.models.detection as detection

In [2]:
# Device configuration (GPU if available)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
# Dataset class to load COCO data
class COCODataset(Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.coco = COCO(annotation)
        self.ids = list(self.coco.imgs.keys())
        self.transforms = transforms

    def __getitem__(self, index):
        # Load the image and the corresponding annotations
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        img_info = coco.loadImgs(img_id)[0]
        path = img_info['file_name']

        img = Image.open(os.path.join(self.root, path)).convert("RGB")

        boxes = []
        labels = []
        for ann in anns:
            xmin = ann['bbox'][0]
            ymin = ann['bbox'][1]
            xmax = xmin + ann['bbox'][2]
            ymax = ymin + ann['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann['category_id'])

        # If no boxes, skip this image
        if len(boxes) == 0:
            return self.__getitem__((index + 1) % len(self.ids))

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels

        if self.transforms:
            img = self.transforms(img)
            

        return img, target

    def __len__(self):
        return len(self.ids)

In [4]:
data_transforms = transforms.Compose([
    transforms.ToTensor(),
])

In [7]:
# Directories for train, validation, and test data
train_img_dir = "SKIN DISEASE.v1i.coco/train"
valid_img_dir = "SKIN DISEASE.v1i.coco/valid"
test_img_dir = "SKIN DISEASE.v1i.coco/test"

train_annotation_file = "SKIN DISEASE.v1i.coco/train/_annotations.coco.json"
valid_annotation_file = "SKIN DISEASE.v1i.coco/valid/_annotations.coco.json"
test_annotation_file = "SKIN DISEASE.v1i.coco/test/_annotations.coco.json"

In [8]:
# Create datasets and dataloaders
train_dataset = COCODataset(root=train_img_dir, annotation=train_annotation_file, transforms=data_transforms)
valid_dataset = COCODataset(root=valid_img_dir, annotation=valid_annotation_file, transforms=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [9]:
# Load pre-trained SSD model and modify it for your number of classes
num_classes = len(train_dataset.coco.getCatIds()) + 1  # Include background class
model = detection.ssd300_vgg16(weights=detection.SSD300_VGG16_Weights.COCO_V1)  # Updated to use 'weights' instead of 'pretrained'

In [10]:
model.head.classification_head.num_classes = num_classes
model.to(device)

SSD(
  (backbone): SSDFeatureExtractorVGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=

In [11]:
# Define optimizer and learning rate scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [12]:
# Training function
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    total_loss = 0
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        total_loss += losses.item()

    print(f"Epoch [{epoch+1}] - Loss: {total_loss / len(data_loader)}")
    return total_loss / len(data_loader)

In [13]:
def evaluate(model, data_loader, device):
    model.eval()  # Set model to evaluation mode
    total_loss = 0

    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass through the model
            # During evaluation, the model returns predictions, not loss_dict
            predictions = model(images)

            # We can process the predictions here if needed (e.g., calculating metrics)

            # If you need to see bounding boxes and labels, uncomment the following lines
            for i, prediction in enumerate(predictions):
                print(f"Image {i+1}:")
                print(f"Boxes: {prediction['boxes']}")
                print(f"Labels: {prediction['labels']}")
                print(f"Scores: {prediction['scores']}")

    print("Evaluation done.")

In [14]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    evaluate(model, valid_loader, device)
    lr_scheduler.step()

Epoch 1/10
Epoch [1] - Loss: 4.275657934146923
Image 1:
Boxes: tensor([[  7.0748,  72.3250, 640.0001, 562.7050],
        [ 45.0694,  85.4903, 640.0001, 551.6967],
        [ 19.5017,  60.1994, 634.8948, 574.7646],
        [ 19.5017,  60.1994, 634.8948, 574.7646],
        [ 33.7032,  61.6903, 635.3581, 573.9441],
        [ 33.7032,  61.6903, 635.3581, 573.9441],
        [ 69.7778, 105.7367, 629.4895, 346.7753],
        [ 18.3422, 215.4028, 533.8239, 481.9982],
        [  2.0894, 352.5930, 335.0251, 543.7270],
        [  0.0000,  13.7605, 494.4039, 160.1408],
        [267.3921, 347.2791, 596.9742, 537.6859],
        [285.2737, 188.1094, 478.0945, 443.6535],
        [269.5195, 226.7977, 371.7852, 367.7696],
        [255.4957, 225.7732, 401.8186, 377.8607],
        [128.4934, 348.0522, 471.5558, 540.8303],
        [ 46.7082,  17.1090, 608.1998, 152.1371],
        [306.1130, 254.8792, 405.7059, 398.5907],
        [207.1373, 208.9940, 422.6917, 433.1930],
        [321.9544, 221.2977, 473.3992

In [15]:
torch.save(model.state_dict(), "ssd_skin_disease.pth")