# Pytorch starter - FasterRCNN Train
In this notebook I enabled the GPU and the Internet access (needed for the pre-trained weights). We can not use Internet during inference, so I'll create another notebook for commiting. Stay tuned!

You can find the [inference notebook here](https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-inference)

- FasterRCNN from torchvision
- Use Resnet50 backbone
- Albumentation enabled (simple flip for now)


In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import datetime as dt

from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

DIR_INPUT = '/kaggle/input/napra13'

DIR_TRAIN = f'{DIR_INPUT}/train/'
DIR_TEST= f'{DIR_INPUT}/train/'
DIR_VALID = f'{DIR_INPUT}/train/'



DIR_MODEL = f'./'
DIR_IN_MODEL = f'../input/models2/fasterrcnn_resnet50_fpn2020-10-31_loss0.057693976908922195'
MODEL_FILE_NAME_PREFIX = 'fasterrcnn_resnet50_fpn';
first = True

train_df = pd.read_csv(f'/kaggle/input/global-wheat-detection/train.csv')
train_df.shape
train_df.head()

train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1

def expand_bbox(x):
    r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
    if len(r) == 0:
        r = [-1, -1, -1, -1]
    return r

train_df[['x', 'y', 'w', 'h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)
train_df.head()

In [None]:
valid_df = pd.read_csv(DIR_VALID + '_annotations.csv')

valid_df['image_id'] = valid_df['filename']
valid_df['source'] = valid_df['class']

valid_df['x'] = valid_df['xmin'].astype(np.float)
valid_df['y'] = valid_df['ymin'].astype(np.float)


valid_df['w'] = valid_df['xmax'] - valid_df['xmin'].astype(np.float)
valid_df['h'] = valid_df['ymax'] - valid_df['ymin'].astype(np.float)

valid_df.drop(columns=['xmin'], inplace=True)
valid_df.drop(columns=['ymin'], inplace=True)
valid_df.drop(columns=['xmax'], inplace=True)
valid_df.drop(columns=['ymax'], inplace=True)
valid_df.drop(columns=['filename'], inplace=True)
valid_df.drop(columns=['class'], inplace=True)
 
valid_df.head()


In [None]:
marking = pd.read_csv(DIR_TRAIN+'_annotations.csv')

marking['image_id'] = marking['filename']
marking['source'] = marking['class']

marking['x'] = marking['xmin'].astype(np.float)
marking['y'] = marking['ymin'].astype(np.float)


marking['w'] = marking['xmax'] - marking['xmin'].astype(np.float)
marking['h'] = marking['ymax'] - marking['ymin'].astype(np.float)

marking.drop(columns=['xmin'], inplace=True)
marking.drop(columns=['ymin'], inplace=True)
marking.drop(columns=['xmax'], inplace=True)
marking.drop(columns=['ymax'], inplace=True)
marking.drop(columns=['filename'], inplace=True)
marking.drop(columns=['class'], inplace=True)
 
marking.head()

image_ids = marking['image_id'].unique()
valid_ids = image_ids[-665:]
train_ids = image_ids[:-665]

valid_df = marking[marking['image_id'].isin(valid_ids)]
marking = marking[marking['image_id'].isin(train_ids)]

In [None]:
valid_df.shape, marking.shape

In [None]:
class WheatDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()        
        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        print('image dir : {}'.format(image_dir))
        self.transforms = transforms

    def __getitem__(self, index: int):
        
        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        
        image = cv2.imread(f'{self.image_dir}/{image_id}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        
        image /= 255.0

        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        # there is only one class
        labels = torch.ones((records.shape[0],), dtype=torch.int64)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
# Albumentations
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


# Create the model

In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)



In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0


In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = WheatDataset(marking, DIR_TRAIN, get_train_transform())
valid_dataset = WheatDataset(valid_df, DIR_VALID, get_valid_transform())


# split the dataset in train and test set
indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Sample

In [None]:
images, targets, image_ids = next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [None]:
boxes = targets[0]['boxes'].cpu().numpy().astype(np.int32)
sample = images[0].permute(1,2,0).cpu().numpy()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(64, 32))

for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(sample)

# Train

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.00001, momentum=0.7, weight_decay=0.00001)
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)
lr_scheduler = None



num_epochs = 2004

In [None]:
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, checkpoint_path)
    # if it is a best model, min validation loss
    if is_best:
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(checkpoint_path, best_model_path)
        
def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into       
    optimizer: optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_fpath)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['train_loss_min']
    # return model, optimizer, epoch value, min validation loss 
    return model, optimizer, checkpoint['epoch'], valid_loss_min.item()

In [None]:
if first & os.path.isfile(DIR_IN_MODEL):
    first = False
    model,optimizer,a,b = load_ckp(DIR_IN_MODEL,model,optimizer)
    print(f"Model loaded from path: {DIR_IN_MODEL}")

loss_hist = Averager()
itr = 1
epochGlobal = 0
for epoch in range(num_epochs):
    loss_hist.reset()
    epochGlobal = epoch;
    for images, targets, image_ids in train_data_loader:
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1
        

    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")   
    
    if epoch%15 == 0:
        checkpoint = {
            'epoch': epoch + 1,
            'train_loss_min': losses,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }

        # save checkpoint
        dtime = dt.datetime.now()        
        filename = dtime.strftime(MODEL_FILE_NAME_PREFIX +'%Y-%m-%d_loss'+str(loss_hist.value))
        save_ckp(checkpoint, False, DIR_MODEL+filename, DIR_MODEL)
        print(f"Model saved in Epoch #{epoch} loss: {loss_hist.value} ") 


In [None]:

images, targets, image_ids = next(iter(valid_data_loader))
images, targets, image_ids = next(iter(valid_data_loader))
images, targets, image_ids = next(iter(valid_data_loader))
#fig, ax = plt.subplots(1, 1, figsize=(64, 32))  
    
#ax.set_axis_off()
#ax.imshow(images[0].permute(1,2,0).cpu().numpy())

In [None]:
images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [None]:
boxesvalid = targets[0]['boxes'].cpu().numpy().astype(np.int32)
sample = images[0].permute(1,2,0).cpu().numpy()

In [None]:
model.eval()
cpu_device = torch.device("cpu")

outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

In [None]:
#images = np.array(Image.open('../input/napra12/train/DJI_0040_JPG.rf.89661312c06e1da26e6ebc0d3df4b673.jpg'))
#outputs = model(images)
#outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

from PIL import Image
from torchvision import transforms



#img_path=f'../input/napra12/valid/DJI_0041.rf.f1fffa49e1114cf317e98d8ba207e27d.jpg'

img_path=f'../input/napra12/train/DJI_0049_JPG.rf.b80d96af2bb81e1a0d51d19ee8c9845b.jpg'


#img_path=f'../input/nagyitott/DJI_0049.JPG'

img_path=f'../input/testviragos/DJI_0046.JPG'




imaget = Image.open(img_path)

sample = transforms.ToTensor()(imaget).unsqueeze_(0).to("cuda")

outputs = model(sample)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
#sample = imaget#[0].permute(1,2,0).cpu().numpy()
sample = cv2.imread(img_path,cv2.IMREAD_COLOR)

In [None]:

fig, ax = plt.subplots(1, 1, figsize=(64, 32))
boxesres = outputs[0]['boxes']
scores = outputs[0]['scores']

i=-1;
for box in boxesres:
    i=i+1;
    if scores[i] > 0.2:
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (220, 0, 0), 3)    
    
ax.set_axis_off()
ax.imshow(sample)