## Stop Colab from disconnecting
Paste the following code into the web console (Ctrl + Shift + I). This code will simulate a click on the “Connect” button every minute, which will keep the notebook active.


In [None]:
'''
function ConnectButton(){
  console.log("Connect pushed");
  document.querySelector("#top-toolbar > colab-connectbutton").shadowRoot.querySelector("#connect").click()
}
setInterval(ConnectButton,60000);
'''

##Set up

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:

%cd /content/gdrive/MyDrive/IntrotoCS_2023/Faster_RCNN

In [None]:
# Download TorchVision repo to use some files from
# references/detection
!pip install pycocotools --quiet
!pip install torchmetrics
!pip install git+https://github.com/albumentations-team/albumentations.git
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

In [None]:
# Basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# xml library for parsing xml files
from xml.etree import ElementTree as et

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

## Prepare Dataset

In [None]:
# defining the files directory and testing directory
train_dir = ['/content/gdrive/MyDrive/IntrotoCS_2023/ver12/train', '/content/gdrive/MyDrive/IntrotoCS_2023/ver12/val']
test_dir = ['/content/gdrive/MyDrive/IntrotoCS_2023/ver12/val']
class UAVImagesDataset(torch.utils.data.Dataset):

    def __init__(self, files_dir_list, width, height, transforms=None):
        self.transforms = transforms
        self.height = height
        self.width = width

        # sorting the images for consistency
        # To get images, the extension of the filename is checked to be jpg
        self.imgs = []
        for files_dir in files_dir_list:
          self.imgs.extend([os.path.join(files_dir, "images", image) for image in sorted(os.listdir(files_dir + "/images"))
                        if image[-4:]=='.jpg' or image[-4:]=='.png'])

        # classes: 0 index is reserved for background
        self.classes = [_, 'tree']

    def __getitem__(self, idx):

        image_path = self.imgs[idx]

        # reading the images and converting them to correct size and color
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)
        # diving by 255
        img_res /= 255.0

        # annotation file
        file_path, annot_filename = os.path.split(image_path)
        annot_filename = annot_filename[:-4] + ".xml"
        annot_file_path = os.path.join(file_path[:-6], "labels", annot_filename)

        boxes = []
        labels = []
        tree = et.parse(annot_file_path)
        root = tree.getroot()

        # cv2 image gives size as height x width
        wt = img.shape[1]
        ht = img.shape[0]

        # box coordinates for xml files are extracted and corrected for image size given
        bbox = False
        for member in root.findall('object'):
            bbox = True
            labels.append(self.classes.index(member.find('name').text))

            # bounding box
            xmin = int(member.find('bndbox').find('xmin').text)
            xmax = int(member.find('bndbox').find('xmax').text)

            ymin = int(member.find('bndbox').find('ymin').text)
            ymax = int(member.find('bndbox').find('ymax').text)


            xmin_corr = (xmin/wt)*self.width
            xmax_corr = (xmax/wt)*self.width
            ymin_corr = (ymin/ht)*self.height
            ymax_corr = (ymax/ht)*self.height

            boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])

        # convert boxes into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # getting the areas of the boxes
        area = (boxes[:,3] - boxes[:,1]) * (boxes[:,2] - boxes[:,0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        labels = torch.as_tensor(labels, dtype=torch.int64)


        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        # image_id
        image_id = torch.tensor([idx])
        target["image_id"] = image_id


        if self.transforms:

            sample = self.transforms(image = img_res,
                                     bboxes = target['boxes'],
                                     labels = labels)

            img_res = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])



        return img_res, target

    def __len__(self):
        return len(self.imgs)


# check dataset
dataset = UAVImagesDataset(train_dir, 224, 224)
print('length of dataset = ', len(dataset), '\n')

# getting the image and target for a test index.  Feel free to change the index.
img, target = dataset[0]
print(img.shape, '\n',target)

In [None]:
# Function to visualize bounding boxes in the image

def plot_img_bbox(img, target):
    # plot the image and bboxes
    # Bounding boxes are defined as follows: x-min y-min width height
    fig, a = plt.subplots(1,1)
    fig.set_size_inches(5,5)
    a.imshow(img)
    target['boxes'] = target['boxes'].cpu()
    for box in (target['boxes']):
        x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
        rect = patches.Rectangle((x, y),
                                 width, height,
                                 linewidth = 2,
                                 edgecolor = 'r',
                                 facecolor = 'none')

        # Draw the bounding box on top of the image
        a.add_patch(rect)
    plt.show()

In [None]:
# Send train=True for training transforms and False for val/test transforms
def get_transform(train):

    if train:
        return A.Compose([
                            # A.augmentations.crops.transforms.BBoxSafeRandomCrop(erosion_rate=0.0, always_apply=False, p=1),
                            # Modify this
                            A.OneOf([
                              A.HorizontalFlip(p=1),
                              A.RandomRotate90(p=1),
                              A.VerticalFlip(p=1)
                            ], p=0.5),
                            # A.Resize(1000, 1000),
                            # ToTensorV2 converts image to pytorch tensor without div by 255
                            ToTensorV2(p=1.0)
                        ], bbox_params=A.BboxParams(format='pascal_voc', min_visibility=0.1, label_fields=['labels']))
    else:
        return A.Compose([
                            ToTensorV2(p=1.0)
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

## Model with backbone Resnet50

In [None]:
def get_object_detection_model(num_classes):

    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [None]:
# use our dataset and defined transformations
dataset = UAVImagesDataset(train_dir, 1024, 1024, transforms= get_transform(train=True))
dataset_val = UAVImagesDataset(train_dir, 1024, 1024, transforms= get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

# train test split
test_split = 0.2
tsize = int(len(dataset)*test_split)
dataset = torch.utils.data.Subset(dataset, indices[:-tsize])
dataset_val = torch.utils.data.Subset(dataset_val, indices[-tsize:])

# define training and validation data loaders
# Change num_workers
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=8, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=8, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [None]:
# to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(params, lr = 0.0005, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate like
# a cosine with step at 5
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 5)

In [None]:
# training for 20 epochs
num_epochs = 50

for epoch in range(num_epochs):
    print(f"Training epoch: {epoch + 1}/{num_epochs}")
    # training for one epoch
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_val, device=device)

!nvidia-smi

In [None]:
!nvidia-smi

In [None]:
# the function takes the original   and the iou threshold.

def apply_nms(orig_prediction, iou_thresh=0.3):

    # torchvision returns the indices of the bboxes to keep
    keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)

    final_prediction = orig_prediction
    final_prediction['boxes'] = final_prediction['boxes'][keep]
    final_prediction['scores'] = final_prediction['scores'][keep]
    final_prediction['labels'] = final_prediction['labels'][keep]

    return final_prediction

# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
    return torchtrans.ToPILImage()(img).convert('RGB')

In [None]:
# pick one image from the val set
img, target = dataset_val[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])[0]

print('predicted #boxes: ', len(prediction['labels']))
print('real #boxes: ', len(target['labels']))

In [None]:
from typing_extensions import final
# Test set
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision import ops

map = MeanAveragePrecision(box_format="xyxy", class_metrics=True)

test_dataset = UAVImagesDataset(test_dir, 1024, 1024, transforms= get_transform(train=False))

preds = []
targets = []
true_pos = 0
false_pos = 0
false_neg = 0
total_label = 0
for i in range(1, len(test_dataset)):
  img, target = test_dataset[i]
  targets.append(target)
  # put the model in evaluation mode
  model.eval()
  with torch.no_grad():
      prediction = model([img.to(device)])[0]
  nms_prediction = apply_nms(prediction, iou_thresh=0.01)

  preds.append(nms_prediction)

  target['boxes'] = target["boxes"].to(device)
  total_label += len(target['labels'])
  num_pred = len(nms_prediction['labels'])
  IoU = ops.box_iou(target["boxes"], nms_prediction["boxes"])
  tp = 0
  fn = 0
  fp = 0
  for result in IoU:
    if torch.any(result > 0.4):
      tp += 1
    else:
      fn += 1
  fp = max(0, num_pred - tp)
  true_pos += tp
  false_pos += fp
  false_neg += fn
  print('EXPECTED OUTPUT\n')
  plot_img_bbox(torch_to_pil(img), target)
  print('MODEL OUTPUT\n')
  plot_img_bbox(torch_to_pil(img), nms_prediction)

print("Dataset length:", len(test_dataset))
print("Labels:", total_label)
precision = true_pos/(true_pos + false_pos)
recall = true_pos/(true_pos + false_neg)
print("Precision:", precision)
print("Recall:", recall)
print("F_1 score:", 2 * (precision * recall)/(precision + recall))

map.update(preds=preds, target=targets)
map.cpu()

for k, v in map.compute().items():
  print(f"val_{k}: {v}")



In [None]:
# Save model
files_dir = r"/content/gdrive/MyDrive/IntrotoCS_2023/Faster_RCNN/weights"
version = len([ver for ver in sorted(os.listdir(files_dir)) if ver[-3:] == ".pt"]) # Check how many models have been saved
torch.save(model.state_dict(), f"{files_dir}/test({version}).pt")
print(f"Saved to test({version}).pt")

In [None]:
# Load model
num_classes = 2
model = get_object_detection_model(num_classes)
model.load_state_dict(torch.load("/content/gdrive/MyDrive/IntrotoCS_2023/Faster_RCNN/weights/test(15).pt")) #Path to the model
model.to(device)
model.eval()

##Reference
https://www.kaggle.com/code/yerramvarun/fine-tuning-faster-rcnn-using-pytorch
https://medium.com/data-science-at-microsoft/how-to-smoothly-integrate-meanaverageprecision-into-your-training-loop-using-torchmetrics-7d6f2ce0a2b3