In [None]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN_MobileNet_V3_Large_FPN_Weights,\
                                         fasterrcnn_mobilenet_v3_large_fpn, \
                                         SSD300_VGG16_Weights, \
                                         ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader

In [None]:
def create_faster_rcnn_mobilenet_v3():
  weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
  model = fasterrcnn_mobilenet_v3_large_fpn(weights = weights)

  # replace classifer
  num_classes = 2

  # get in features
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

  return model

In [None]:
# Read Kaggle JSON File from Google Drive
from google.colab import drive
import os

try:
  !rmdir ~/.kaggle
  !mkdir ~/.kaggle
  print("Successfully created kaggle folder")
  drive.mount('/content/drive/')
  !cp /content/drive/MyDrive/apis/kaggle.json ~/.kaggle/kaggle.json
  print("Succssfully copied kaggle json and now moving it to colab space")
  drive.flush_and_unmount()
except:
  raise Exception("Issue occurred while trying to create Kaggle Folder")

rmdir: failed to remove '/root/.kaggle': No such file or directory
Successfully created kaggle folder
Mounted at /content/drive/
Succssfully copied kaggle json and now moving it to colab space


In [None]:
# Create Custom Dataset from the Plastic Bottle dataset on Kaggle

import zipfile

!kaggle datasets download -d siddharthkumarsah/plastic-bottles-image-dataset\

# Unzip the folder
path = 'plastic-bottles-image-dataset.zip'
destination = 'plastic-bottles-image-dataset'

with zipfile.ZipFile(path, 'r') as zip_ref:
  zip_ref.extractall(destination)

Downloading plastic-bottles-image-dataset.zip to /content
100% 1.53G/1.53G [01:27<00:00, 21.0MB/s]
100% 1.53G/1.53G [01:27<00:00, 18.7MB/s]


In [None]:
import os
import torch
from PIL import Image

class PlasticBottleDataset(Dataset):

  def __init__(self, root, transforms, train):
    self.root = root
    self.transforms = transforms
    
    # root is the dataset path  'plastic-bottles-images-dataset'
    folder = 'train' if train else 'test'
    self.images = [os.path.join(root, folder, 'images', image_path) for image_path in os.listdir(root / folder  / 'images')]
    self.images.sort()
    self.ground_truths = [os.path.join(root, folder, 'labels', label_path) for label_path in os.listdir(root / folder / 'labels')]
    self.ground_truths.sort()

  def __getitem__(self, idx):
    image_path = self.images[idx]
    ground_truth_path = self.ground_truths[idx]
    img = Image.open(image_path).convert('RGB')

    boxes = []
    with open(ground_truth_path, 'r') as ground_truth:
      for line in ground_truth:
        # Transforms the X, Y, W, H into Xmin, Ymin, Xmax, Ymax
        yolo_format = [float(val) for val in line.split()[1:]]
        
        w = yolo_format[2] * img.width
        h = yolo_format[3] * img.height
        x_min = max(0, ((2 * yolo_format[0] * img.width) - w) / 2)
        y_min = max(0, ((2 * yolo_format[1] * img.height) - h) / 2)
        x_max = min(img.width, x_min + w)
        y_max = min(img.height, y_min + h)

        if y_min == y_max or x_min == x_max:
          continue

        boxes.append([x_min, y_min, x_max, y_max])

    boxes = torch.as_tensor(boxes, dtype = torch.float32)
    if len(boxes) == 0:
      # Issue comes up if no bounding boxes have been found or incorrect measurements given for the bounding box
      # If so, we assume the whole picture is the bounding box
      boxes = torch.as_tensor([[0, 0, img.width, img.height]], dtype = torch.float32)
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    labels = torch.ones((len(boxes)), dtype = torch.int64)
    iscrowd = torch.zeros((len(boxes)), dtype = torch.int64)

    image_id = torch.tensor([idx])

    target = {}
    target['boxes'] = boxes
    target['area'] = area
    target['iscrowd'] = iscrowd
    target['image_id'] = image_id
    target['labels'] = labels

    if self.transforms:
      img, target = self.transforms(img,target)
    
    return img, target
  
  def __len__(self):
    return len(self.images)

In [None]:
## This script downloads the most recent. Website example shows the use of a deprecated version.

%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 323012, done.[K
remote: Counting objects: 100% (983/983), done.[K
remote: Compressing objects: 100% (231/231), done.[K
remote: Total 323012 (delta 764), reused 952 (delta 743), pack-reused 322029[K
Receiving objects: 100% (323012/323012), 654.75 MiB | 4.77 MiB/s, done.
Resolving deltas: 100% (296810/296810), done.




In [None]:
import transforms
# create the transforms, simple horizontal flip

def get_transforms(train):
  if train:
    return transforms.Compose([
      transforms.PILToTensor(),
      transforms.ConvertImageDtype(torch.float),
      transforms.RandomHorizontalFlip(0.5)]  
    )
  else:
    return transforms.Compose([
        transforms.PILToTensor(),
        transforms.ConvertImageDtype(torch.float)]
    )

In [None]:
# set up the dataloaders
import utils

def get_dataloaders(root):
  train_dataset, test_dataset = get_datasets(root)
  
  train_dataloader = DataLoader(train_dataset,
                                batch_size = 2,
                                num_workers = os.cpu_count(),
                                shuffle = True,
                                collate_fn = utils.collate_fn)
  
  test_dataloader = DataLoader(test_dataset,
                               batch_size = 1,
                               num_workers = 0,
                               shuffle = False,
                               collate_fn = utils.collate_fn)
  
  return train_dataloader, test_dataloader

def get_datasets(root):
  train_dataset = PlasticBottleDataset(root,
                                       transforms = get_transforms(train = True),
                                       train = True)
  test_dataset = PlasticBottleDataset(root,
                                      transforms = get_transforms(train = False),
                                      train = False)
  
  return train_dataset, test_dataset

In [None]:
# set up the optimizer
def get_optim(model):
  params = [param for param in model.parameters() if param.requires_grad]
  optimizer = torch.optim.SGD(params, lr = 0.001, momentum = 0.9000, weight_decay = 0.0005)
  lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer = optimizer,
                                                 step_size = 3,
                                                 gamma = 0.1)
  return optimizer, lr_scheduler


In [None]:
def model_save_to_drive(model, epoch):

  model_name = f'trash_detection_faster_rcnn_mobilenetv2_epoch_{epoch}.pth'
  model_file = '/content/drive/MyDrive/models/'

  drive.mount('/content/drive/')
  torch.save(model.state_dict(), model_file + model_name)
  drive.flush_and_unmount()


In [None]:
from engine import train_one_epoch, evaluate
from pathlib import Path
def run():

  # set the device
  device = 'cuda' if torch.cuda.is_available() else 'cpu'

  # create the dataloaders
  root_path = Path(os.getcwd()) / "plastic-bottles-image-dataset" / 'Plastic Bottle Image Dataset'
  train_dataloader, test_dataloader = get_dataloaders(root_path)

  # get the model
  model = create_faster_rcnn_mobilenet_v3().to(device)

  # get the optimizer and learning rate scheduler
  optim, lr_scheduler = get_optim(model)

  # train step
  epochs = 5
  
  for epoch in range(epochs):
    train_one_epoch(model = model,
                    optimizer = optim,
                    data_loader = train_dataloader,
                    device = device,
                    epoch = epoch,
                    print_freq = 10)
    
    lr_scheduler.step()
    evaluate(model = model,
             data_loader = test_dataloader,
             device = device)

    model_save_to_drive(model, epoch)

  return model


In [None]:
model = run()

Epoch: [0]  [   0/1089]  eta: 0:26:09  lr: 0.000002  loss: 1.6020 (1.6020)  loss_classifier: 0.7861 (0.7861)  loss_box_reg: 0.7116 (0.7116)  loss_objectness: 0.0085 (0.0085)  loss_rpn_box_reg: 0.0959 (0.0959)  time: 1.4416  data: 1.1475  max mem: 1601
Epoch: [0]  [  10/1089]  eta: 0:08:48  lr: 0.000012  loss: 1.6020 (1.6462)  loss_classifier: 0.7226 (0.7393)  loss_box_reg: 0.5685 (0.4663)  loss_objectness: 0.2330 (0.3642)  loss_rpn_box_reg: 0.0399 (0.0764)  time: 0.4895  data: 0.3236  max mem: 1931
Epoch: [0]  [  20/1089]  eta: 0:06:54  lr: 0.000022  loss: 1.3572 (1.5275)  loss_classifier: 0.7226 (0.7530)  loss_box_reg: 0.3783 (0.4343)  loss_objectness: 0.1395 (0.2773)  loss_rpn_box_reg: 0.0245 (0.0629)  time: 0.3355  data: 0.1783  max mem: 1931
Epoch: [0]  [  30/1089]  eta: 0:06:52  lr: 0.000032  loss: 1.2900 (1.4468)  loss_classifier: 0.6510 (0.7054)  loss_box_reg: 0.3440 (0.4195)  loss_objectness: 0.1088 (0.2619)  loss_rpn_box_reg: 0.0299 (0.0599)  time: 0.3347  data: 0.1691  max me