<a href="https://colab.research.google.com/github/skj092/Object-Detection/blob/main/faster_rcnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from glob import glob
import os
from pathlib import Path
import xml.etree.ElementTree as Xet
import matplotlib.pyplot as plt
import cv2

In [None]:
ls ../input/fruit-images-for-object-detection/

In [None]:
train_xmls = glob('../input/fruit-images-for-object-detection/train_zip/train/*.xml')
len(train_xmls)

In [None]:

# Importing the required libraries
import xml.etree.ElementTree as Xet
import pandas as pd
  
cols = ["filename", "name", "xmin", "ymin", "xmax", "ymax"]
rows = []
  
# Parsing the XML file
for i in range(len(train_xmls)):
    xmlparse = Xet.parse(train_xmls[i])
    root = xmlparse.getroot()

    filename = root[1].text
    label = root[6][0].text
    xmin = root[6][4][0].text
    ymin = root[6][4][1].text
    xmax = root[6][4][2].text
    ymax = root[6][4][3].text
    columns = [filename, label, xmin, ymin, xmax, ymax]

    rows.append(columns)
# # Writing dataframe to csv
df = pd.DataFrame(rows, columns=cols)
df.head()

In [None]:
image_list = glob("../input/fruit-images-for-object-detection/train_zip/train/*.jpg")
image_list[:5]

In [None]:
lbltoid = {'apple':0, 'banana':1, 'orange':2}

In [None]:
from torchvision import transforms as T
import torch

tfms = T.Compose([T.PILToTensor(),T.ConvertImageDtype(torch.float)])

In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image

class FruitData(torch.utils.data.Dataset):
    def __init__(self, img, df, transform=None):
        self.images = img
        self.df = df
        self.transforms = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = Path(self.images[idx])
        img = Image.open(img_path).convert('RGB').resize((224,224))
        name = img_path.name
        bbox = []
        xmin = int(df.loc[df.filename==name].iloc[0]['xmin'])
        ymin = int(df.loc[df.filename==name].iloc[0]['ymin'])
        xmax = int(df.loc[df.filename==name].iloc[0]['xmax'])
        ymax = int(df.loc[df.filename==name].iloc[0]['ymax'])
        label = df.loc[df.filename==name].iloc[0]['name']
        bbox.append([xmin, ymin, xmax, ymax])
        bbox = torch.as_tensor(bbox, dtype=torch.float32)
        label = torch.as_tensor([lbltoid[label]])
        if self.transforms is not None:
            img = self.transforms(img)
        return img, bbox, label

In [None]:
data = FruitData(image_list, df, transform=tfms)

In [None]:
def plot_img_bbox(img, target):
    # plot the image and bboxes
    # Bounding boxes are defined as follows: x-min y-min width height
    fig, a = plt.subplots(1,1)
    fig.set_size_inches(5,5)
    a.imshow(img)
    for box in (target['boxes']):
        x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
        rect = patches.Rectangle((x, y),
                                 width, height,
                                 linewidth = 2,
                                 edgecolor = 'r',
                                 facecolor = 'none')

        # Draw the bounding box on top of the image
        a.add_patch(rect)
    plt.show()
    
# plotting the image with bboxes. Feel free to change the index
img, target = data[25]
plot_img_bbox(img, target)

In [None]:
data[0]

In [None]:
data_loader = torch.utils.data.DataLoader(data, batch_size=2, shuffle=True)

In [None]:
images, bboxs, labels = next(iter(data_loader))
print(images.shape, bboxs.shape, labels.shape)

# modeling

In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
num_classes = 3  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

## Sample

In [None]:
images.shape, bboxs.shape

In [None]:
targets = []
for i in range(len(images)):
    d = {}
    d['boxes'] = bboxs[i]
    d['labels'] = labels[i]
    targets.append(d)
targets

In [None]:
images.shape

In [None]:
output = model(images, targets)

In [None]:
output

In [None]:
images[0].shape

In [None]:
sample = images[0].permute(1,2,0).cpu().numpy()
targets = [{k: v for k, v in t.items()} for t in targets]
boxes = targets[0]['boxes'].cpu().numpy().astype(np.int32)

In [None]:
plt.imshow(sample)

In [None]:
boxes[0]

In [None]:
import matplotlib.patches as patches
# Create figure and axes
fig, ax = plt.subplots()

# Display the image
ax.imshow(sample)

# Create a Rectangle patch
rect = patches.Rectangle((boxes[0][0],50), boxes[0][1], boxes[0][1], 30, linewidth=1, edgecolor='g', facecolor='none')

# Add the patch to the Axes
ax.add_patch(rect)

plt.show()

# Training

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

num_epochs = 2

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0


In [None]:
loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    loss_hist.reset()
    
    for images, targets, image_ids in data_loader:
        targets = []
        for i in range(len(images)):
            d = {}
            d['boxes'] = bboxs[i]
            d['labels'] = labels[i]
            targets.append(d)
        targets
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")   

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn.pth')