Requirements

In [None]:
!pip install albumentations==0.4.6
!pip install matplotlib
!pip install numpy
!pip install pandas
!pip install pycocotools
!pip install cpython
!pip install wget
!git clone https://github.com/pytorch/vision

Set Dataset pathing and torchvision reference modules

In [15]:
import sys
sys.path.insert(0, "/content/vision/references/detection")

dataset_path = "/content/drinkscoco"

Download Dataset

In [None]:
import wget
import shutil

url = "https://github.com/st0bb3n/ObjectDetection-Drinks/releases/download/Dataset/drinkscoco.zip"
x = wget.download(url, "data.zip")

shutil.unpack_archive("data.zip","drinkscoco")

Main function + Dataset Class. To be run first.

In [None]:
import numpy as np 
import pandas as pd 
import os
import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset
import copy
from PIL import Image
import cv2
import albumentations as A  # our data augmentation library
import matplotlib.pyplot as plt
import datetime
import time
from torchvision.utils import draw_bounding_boxes
from pycocotools.coco import COCO
#from albumentations.pytorch import ToTensorV2
from engine import evaluate, train_one_epoch
import utils

class Drinks(datasets.VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None, transforms=None):
        # the 3 transform parameters are required for datasets.VisionDataset
        super().__init__(root, transforms, transform, target_transform)
        self.split = split #train, valid, test
        self.coco = COCO(os.path.join(root, split, "_annotations.coco.json")) # annotations stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
    
    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, self.split, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)

        return image/255

    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))
    
    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))
        
        boxes = [t['bbox'] + [t['category_id']] for t in target] 
        
        new_boxes = [] # convert from xywh to xyxy
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])
        
        boxes = torch.tensor(new_boxes, dtype=torch.float32)
        
        targ = {} # here is our transformed target
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        #targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['image_id'] = torch.tensor([index])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        
        return torchvision.transforms.ToTensor()(image), targ # scale images

    def __len__(self):
        return len(self.ids)

test.py

In [None]:
coco = COCO(os.path.join(dataset_path, "train", "_annotations.coco.json"))
categories = coco.cats
n_classes = len(categories.keys())
classes = [i[1]['name'] for i in categories.items()]

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_classes)

train_dataset = Drinks(root=dataset_path, transforms=torchvision.transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=utils.collate_fn)

images,targets = next(iter(train_loader))
images = list(image for image in images)
targets = [{k:v for k, v in t.items()} for t in targets]
output = model(images, targets)

device = torch.device("cuda")
model = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)

dataset = Drinks(root=dataset_path, transforms=torchvision.transforms.ToTensor())
test_dataset = Drinks(root=dataset_path, split="test", transforms=torchvision.transforms.ToTensor())

data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=2,
        collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=2,
        collate_fn=utils.collate_fn)

evaluate(model, data_loader_test, device=device)

train.py

In [None]:
coco = COCO(os.path.join(dataset_path, "train", "_annotations.coco.json"))
categories = coco.cats
n_classes = len(categories.keys())
classes = [i[1]['name'] for i in categories.items()]

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features # we need to change the head
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_classes)

train_dataset = Drinks(root=dataset_path, transforms=torchvision.transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=utils.collate_fn)

images,targets = next(iter(train_loader))
images = list(image for image in images)
targets = [{k:v for k, v in t.items()} for t in targets]
#output = model(images, targets)
output = model(images, targets)
device = torch.device("cuda") # use GPU to train
model = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)

dataset = Drinks(root=dataset_path, transforms=torchvision.transforms.ToTensor())
test_dataset = Drinks(root=dataset_path, split="test", transforms=torchvision.transforms.ToTensor())

data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=2,
        collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=2,
        collate_fn=utils.collate_fn)

num_epochs=10

for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=50)
    #lr_scheduler.step()
    evaluate(model, data_loader_test, device=device)
    
model.eval()
torch.save(model.state_dict(), "trainedmodel.pth")
torch.cuda.empty_cache()