In [1]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [3]:
%ls

 [0m[01;32mFaster_RCNN.ipynb[0m*    [01;32m'fasterrcnn_resnet50_epoch_(epoch + 1).pth'[0m*   [01;34mvalid[0m/
 [01;32mREADME.dataset.txt[0m*    [01;34mtest[0m/
 [01;32mREADME.roboflow.txt[0m*   [01;34mtrain[0m/


In [4]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

# Load datasets
train_dataset = get_coco_dataset(
    img_dir="train",
    ann_file="train/annotations/train_annotations.json"
)

val_dataset = get_coco_dataset(
    img_dir="valid",
    ann_file="valid/annotations/valid_annotations.json"
)


train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda batch: tuple(zip(*batch)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda batch: tuple(zip(*batch)))

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [5]:
# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [6]:
# Initialize the model
num_classes = 13  # Background + chair, human, table
model = get_model(num_classes)



In [7]:
#Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

#Define optimizer and Learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

In [8]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
  model.train()

  for images, targets in data_loader:
    #Move images to the device
    images = [img.to (device) for img in images]

    # Validate and process targets
    processed_targets = []
    valid_images = []
    for i, target in enumerate(targets):
      boxes = []
      labels = []
      for obj in target:
        #Extract bbox
        bbox = obj["bbox"] # Format: (x, y, width, height]
        x, y, w, h = bbox

        #Ensure the width and height are positive
        if w > 0 and h> 0:
          boxes.append([x, y, x + w, y + h]) # Convert to [x_min, y_min, x_max, y_max]
          labels.append(obj["category_id"])

      #Only process if there are valid boxes
      if boxes:
        processed_target= {
          "boxes": torch.tensor (boxes, dtype=torch.float32).to (device),
          "labels": torch.tensor(labels, dtype=torch.int64).to(device),
        }
        processed_targets.append(processed_target)
        valid_images.append(images[i]) # Add only valid images

    #Skip iteration if no valid targets
    if not processed_targets:
      continue

    #Ensure images and targets are aligned
    images = valid_images

    #Forward pass
    loss_dict = model(images, processed_targets)
    losses = sum(loss for loss in loss_dict.values())

    #Backpropagation

    optimizer.zero_grad()
    losses.backward()
    optimizer.step()

  print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [9]:
#Training loop
num_epochs = 2
for epoch in range(num_epochs):
  train_one_epoch (model, optimizer, train_loader, device, epoch)
  lr_scheduler.step()

  #Save the model's state dictionary after every epoch
  model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
  torch.save(model.state_dict(), model_path)
  print(f"Model saved: (model_path)")