# TobacoShelves Object Detection - PyTorch and Faster RCNN
--------------------------------
## ParallelDots coding assignment - Manav Gakhar
--------------------------------

## Importing the necessary libraries 

In [None]:
import os
import collections
import pandas as pd
import numpy as np
import functools
import matplotlib.pyplot as plt
import cv2

from sklearn import preprocessing 

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import SequentialSampler

## Modifying dataframe

In [None]:
# path to the folders
IMG_PATH = "../input/tobaccoshelves/TobaccoShelves"
annotation_path = "../input/tobaccoshelves/TobaccoShelves_coco_gt_object.csv"

In [None]:
# modifying dataframe
df2 = pd.read_csv(annotation_path)
df2['img_path'] = [IMG_PATH + "/"+ x for x in df2['image_name']]
df2['img_id'] = [x.split(".")[0] for x in df2['image_name']]

df2['labels'] = df2['class']
df2['xmin'] = df2['x1']
df2['xmax'] = df2['x2']
df2['ymin'] = df2['y1']
df2['ymax'] = df2['y2']

In [None]:
# defining class dictionary manually
classes= {0:"0", 1:'1',2:'2',3:'3',4:'4',5:'5',6:'6',7:'7',8:'8',9:'9',10:'10',11:'11'}

## Train-val-test split

In [None]:
## Splitting according to img_ids
image_ids = df2['img_id'].unique()
train_ids = image_ids[:280]
valid_ids = image_ids[-25:]
test_ids = image_ids[290:-25]

## Defining dataframes lol
valid_df = df2[df2['img_id'].isin(valid_ids)]
train_df = df2[df2['img_id'].isin(train_ids)]
test_df = df2[df2['img_id'].isin(test_ids)]

## Dataset class and helper functions 

In [None]:
# Custom dataset class 
class TobaccoShelves(Dataset):
    
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        #list of uniques img ids 
        self.image_ids = dataframe['img_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
    
    def __getitem__(self, index: int):
        
        image_id = self.image_ids[index]
        records = self.df[self.df['img_id'] == image_id]
        
        # reading the image using opencv
        image = cv2.imread(f'{self.image_dir}/{image_id}.JPG', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        rows, cols = image.shape[:2]
        
        # getting bbox values
        boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
        
        # calc. area of bbox
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        
        # getting value of labels
        label = records['labels'].values
        labels = torch.as_tensor(label, dtype=torch.int64)
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        # defining target dictionary with all necessary values
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd
        
        # condition for transformations
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1,0)
            
            
            return image, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
# Helper functions 

# defining albumentations for training and validation sets
def get_transform_train():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        ToTensorV2(p=1.0)
    ], bbox_params={'format':'pascal_voc', 'label_fields': ['labels']})

def get_transform_valid():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields':['labels']})

def collate_fn(batch):
    return tuple(zip(*batch))

## Dataloaders

In [None]:
train_dataset = TobaccoShelves(train_df, IMG_PATH , get_transform_train())
valid_dataset = TobaccoShelves(valid_df, IMG_PATH, get_transform_valid())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()


train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

# checking if we have gpu available for training 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Defining the model

In [None]:
# loading pre-trained model (coco)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 11  

# no. of input features of classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replacing pre-trained head with new head (for fine-tuning)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

# defining hyperparameters of model
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
# getting training helpers
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!git clone https://github.com/pytorch/vision.git
!cd vision;cp references/detection/utils.py ../;cp references/detection/transforms.py ../;cp references/detection/coco_eval.py ../;cp references/detection/engine.py ../;cp references/detection/coco_utils.py ../

## Training

In [None]:
from engine import train_one_epoch, evaluate
import utils
# training the model for two epochs
num_epochs = 2

for epoch in range(num_epochs):
    
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=35)
    # update the learning rate
    lr_scheduler.step()
    
    # evaluate on the validation dataset
#     evaluate(model, valid_data_loader, device=device)

#saving the model
torch.save(model.state_dict(), 'faster_rcnn_state.pth')

## Testing

In [None]:
# load  a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

WEIGHTS_FILE = "./faster_rcnn_state.pth"

num_classes = 11

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# load the trained weights
model.load_state_dict(torch.load(WEIGHTS_FILE))

# send model to the device
model = model.to(device)

In [None]:
# Test helper function (for inference)
def obj_detector(img):
    
    # reafint image using opencv
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    img /= 255.0
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)
    img = img.permute(0,3,1,2)
    
    # defining threshold
    detection_threshold = 0.1
    
    img = list(im.to(device) for im in img)
    output = model(img)
    
    # iterating through list of images
    for i , im in enumerate(img):
        
        # transfering everything to cpu device (if it was on gpu)
        boxes = output[i]['boxes'].data.cpu().numpy()
        scores = output[i]['scores'].data.cpu().numpy()
        labels = output[i]['labels'].data.cpu().numpy()
        
        labels = labels[scores >= detection_threshold]
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]

        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
    
    sample = img[0].permute(1,2,0).cpu().numpy()
    sample = np.array(sample)
    boxes = output[0]['boxes'].data.cpu().numpy()
    name = output[0]['labels'].data.cpu().numpy()
    scores = output[0]['scores'].data.cpu().numpy()
    boxes = boxes[scores >= detection_threshold].astype(np.int32)
    names = name.tolist()
    
    return names, boxes, sample

In [None]:
## Plotting inference time images
pred_files = test_df['img_path'].unique()
plt.figure(figsize=(20,60))
for i, images in enumerate(pred_files):
    if i > 3:break
    plt.subplot(10,2,i+1)
    names,boxes,sample = obj_detector(images)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,(box[0], box[1]),(box[2], box[3]),(0, 220, 0), 2) 

    plt.axis('off')
    plt.imshow(sample)
#     plt.savefig('save_image.png', bbox_inches='tight')  