## Object Detection Tutorial

- ~~Pre-process to Remo Format~~
- ~~Upload Data to Remo (visualize) (Train / Test / Valid split?)~~
- ~~visualize~~
- annotation stats
- ~~Export from Remo.ai (data + tags) -> Plain CSV~~
- ~~Create Unified Dataset for train and test/inference~~
- ~~Train Faster RCNN~~
- ~~Save model~~
- ~~Run inference on test set and save to CSV~~
- ~~Upload to Remo (Model Predictions)~~

In [None]:
import sys
local_path_to_repo = "../../remo-python"
sys.path.insert(0, local_path_to_repo)

In [None]:
%load_ext autoreload
%autoreload 2
import remo

In [None]:
# Imports
import pandas as pd
import numpy as np
import os
import glob
import random
from PIL import Image
import csv
random.seed(4)

import tqdm

import torch
from torch.utils.data import DataLoader, Dataset

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision.transforms as transforms


import remo
remo.set_viewer('jupyter')

In [None]:
root_dir = "wheat_dataset"
images_path = os.path.join(root_dir, "images")
annotations_path = os.path.join(root_dir, "annotations")

### Train Test Split

In [None]:
## REQUIRES MERGE OF PULL REQUEST TO RUN

im_list = [os.path.basename(i) for i in glob.glob(str(images_path)+"/**/*.jpg", recursive=True)]
im_list = random.sample(im_list, len(im_list))
# Defining the train test split
train_idx = round(len(im_list) * 0.4)
valid_idx = train_idx + round(len(im_list) * 0.3)
test_idx = valid_idx + round(len(im_list) * 0.3)

# Tags Dictionary
tags_dict = {"train" : im_list[0:train_idx], "valid" : im_list[train_idx:valid_idx], "test" : im_list[valid_idx:test_idx]}

# Generating Tags file
remo.generate_tags_from_folders(tags_dictionary = tags_dict)

### Upload Data

In [None]:
wheat_dataset = remo.create_dataset(name="Wheat Dataset", local_files=[root_dir, "train_test_valid_split.csv"], annotation_task= "Object Detection")

## Visualize images/dataset

In [None]:
wheat_dataset.view()

In [None]:
wheat_dataset.view_annotation_stats()

In [None]:
wheat_dataset.export_annotations_to_file("wheat_dataset.zip", annotation_format="csv", full_path='true')
!unzip wheat_dataset

In [132]:
class ObjectDetectionDataset(Dataset):

    def __init__(self, annotations, train_test_split, transform=None, mode="train"):
        self.mode = mode

        self.data = pd.read_csv(annotations)
        self.data["im_name"] = self.data["file_name"].apply(lambda x : os.path.basename(x))
        self.data = self.data.set_index("im_name")

        # Tags for Test Train Split
        self.train_test_split = pd.read_csv(train_test_split).set_index("file_name")
        self.data["tag"] = -1

        # Update Tags using Pandas, Column im_name in self.data is compared to file_name in self.train_test_split 
        self.data.update(self.train_test_split)
        
        # Load only Train/Test/Split depending on the mode
        self.data = self.data[self.data["tag"] == self.mode].reset_index(drop=True)
        
        self.file_names = self.data['file_name'].unique()
        self.transform = transform
        self.mode = mode

    def __len__(self) -> int:
        return self.file_names.shape[0]


    def __getitem__(self, index: int):

        file_name = self.file_names[index]
        records = self.data[self.data['file_name'] == file_name]
        
        image = np.array(Image.open(file_name), dtype=np.float32)
        image /= 255.0

        if self.transform:
            image = self.transform(image)  
            
        if self.mode != "test":
            boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
            
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            area = torch.as_tensor(area, dtype=torch.float32)

            labels = torch.ones((records.shape[0],), dtype=torch.int64)
            
            iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
            
            target = {}

            target['boxes'] = boxes
            target['labels'] = labels
            target['image_id'] = torch.tensor([index])
            target['area'] = area
            target['iscrowd'] = iscrowd 
            target['boxes'] = torch.stack(list((map(torch.tensor, target['boxes'])))).type(torch.float32)

            return image, target, file_name
        else:
            return image, file_name


def collate_fn(batch):
    return tuple(zip(*batch))


In [133]:
tensor_transform = transforms.Compose([transforms.ToTensor()])

train_dataset = ObjectDetectionDataset(annotations="Object detection.csv",  
                                      train_test_split="tags.csv", 
                                      transform=tensor_transform, 
                                      mode="train")

test_dataset = ObjectDetectionDataset(annotations="Object detection.csv", 
                                      train_test_split="tags.csv", 
                                      transform=tensor_transform, 
                                      mode="test")
                                      
valid_dataset = ObjectDetectionDataset(annotations="Object detection.csv",
                                       train_test_split="tags.csv", 
                                       transform=tensor_transform, 
                                       mode="valid")

train_data_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn)
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn)
valid_data_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn)

In [134]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Training

In [135]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [136]:
num_classes = 2

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [137]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

num_epochs = 3

In [None]:
loss_value = 0.0
for epoch in range(num_epochs):
    train_data_loader = tqdm.tqdm(train_data_loader)
    for images, targets, image_ids in train_data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"\n Epoch #{epoch} loss: {loss_value}") 

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn.pth')

## Model Predictions and Visualization

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2  # 1 class (wheat) + background

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.load_state_dict(torch.load('fasterrcnn_resnet50_fpn.pth'))
model.eval()

x = model.to(device)

In [None]:
detection_threshold = 0.6
results = []

test_data_loader = tqdm.tqdm(test_data_loader)

for images, image_ids in test_data_loader:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        for box in boxes:
            results.append({"file_name" : os.path.basename(image_id), "classes" : 0, "xmin" : box[0], "ymin" : box[1], 
                                "xmax" : box[2], "ymax" : box[3]})

with open('results.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["file_name", "classes", "xmin", "ymin", "xmax", "ymax"])
    writer.writeheader()
    writer.writerows(results)



In [None]:
wheat_dataset.create_annotation_set("Object Detection", name="model_predictions", path_to_annotation_file="./results.csv")

In [None]:
wheat_dataset.view()