In [None]:
import pandas as pd
from pathlib import Path
import ast
import numpy as np
from PIL import Image, ImageDraw
from torch.utils.data import Dataset, DataLoader
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from matplotlib import pyplot as plt
from typing import Union
import numpy as np

import torchvision.transforms as T

In [None]:
data_path = Path('../input/global-wheat-detection/')

In [None]:
df = pd.read_csv(data_path/'train.csv')
df.head()

In [None]:
df['bbox'] = df['bbox'].apply(lambda x: ast.literal_eval(x))
x = np.array(list(df['bbox']))

for i,dim in enumerate(['x', 'y', 'w', 'h']):
    df[dim] = x[:,i]
    
df.drop('bbox', axis = 1, inplace = True)
df.head()

In [None]:
image_ids = df['image_id'].unique()
valid_ids = image_ids[-665:]
train_ids = image_ids[:-665]

In [None]:
train_df = df[df['image_id'].isin(train_ids)]
valid_df = df[df['image_id'].isin(valid_ids)]

In [None]:
class WheatDataset(Dataset):
    def __init__(self, df, image_dir, transforms = None):
        super().__init__()
        
        self.df = df
        self.image_ids = self.df['image_id'].unique()
        self.image_dir = Path(image_dir)
        self.transforms = transforms
    
    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        records = self.df[self.df['image_id'] == image_id]
            
        im_name = image_id + '.jpg'
        img = Image.open(self.image_dir/im_name).convert("RGB")
        img = T.ToTensor()(img)
        
        if self.transforms:
            img = self.transforms(img)
        
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        boxes = torch.tensor(boxes, dtype=torch.int64)
        
        labels = torch.ones((records.shape[0],), dtype=torch.int64)
        
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])
        
        return img, target, image_id       
    
    def __len__(self):
        return self.image_ids.shape[0]

In [None]:
train_ds = WheatDataset(train_df, data_path/'train')
val_ds = WheatDataset(valid_df, data_path/'train')

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
train_dl = DataLoader(train_ds, batch_size=16, num_workers=4, collate_fn=collate_fn, pin_memory = True)
val_dl = DataLoader(train_ds, batch_size=8, collate_fn=collate_fn, pin_memory = True)

In [None]:
images, targets, _ = next(iter(train_dl))

In [None]:
boxes = targets[2]['boxes']
boxes.shape

In [None]:
boxes[0]

In [None]:
im = (images[2].permute(1,2,0).numpy() * 255).astype('uint8')
sample = Image.fromarray(im)

In [None]:
draw = ImageDraw.Draw(sample)
for box in boxes:
    draw.rectangle(box.numpy(), fill = None, outline = "red")
sample

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
num_classes = 2 # should be initialized as target_col.nunique + 1
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

In [None]:
%%time
model.to(device)
for epoch in range(num_epochs):
    epoch_loss = 0
    for images, targets, _ in train_dl:
        optimizer.zero_grad()
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
#         print(loss_dict)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        losses.backward()
        optimizer.step()
    print(f"loss for epoch {epoch}: {epoch_loss / len(train_dl)}")
    

In [None]:
torch.save(model, '/kaggle/working/model.pth')

In [None]:
results=[]
detection_threshold = 0.45
model.eval()
model.to(device)

for images, targets,idx in val_dl:    

    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data
        scores = outputs[i]['scores'].data
        labels = outputs[i]['labels'].data

        keep = torchvision.ops.nms(boxes, scores, 0.3)
        boxes = boxes[keep]
        scores = scores[keep]
        image_id = idx[i]
    
        op = (idx[i], boxes, scores)
        results.append(op)

        break
    break

In [None]:
im_name = image_id + '.jpg'
img = Image.open(data_path/'train'/im_name).convert("RGB")
img = T.ToTensor()(img)

In [None]:
# make a function to prepare and annotate images
im = (img.permute(1,2,0).detach().numpy() * 255).astype('uint8')
vsample = Image.fromarray(im)

In [None]:
draw = ImageDraw.Draw(vsample)
for box in boxes:
    draw.rectangle(list(box), fill = None, outline = "red")
vsample

In [None]:
features = []
def store_features(mod, inp, outp):
    features.append(outp.data) # this will store [batch-size, channel, height, widht]
    # if you want to store the feature map per image use torch.unbind

In [None]:
to_hook = 'backbone.fpn.layer_blocks.3'
for name, layer in model.named_modules():
    if name == to_hook:
        layer.register_forward_hook(store_features) # if you want to pass more arguments to store features use partial functions

### Reference:

https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train?sortBy=relevance&group=everyone&search=Pytorch&page=1&pageSize=20&competitionId=19989