In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import time, random, os, cv2, ast, glob, numba
from numba import jit
from tqdm.autonotebook import tqdm
from pprint import pprint
import sys

import torch, torchvision
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import AnchorGenerator, FastRCNNPredictor, FasterRCNN
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models import mobilenet_v2, resnet101, vgg19

In [None]:
train_csv = '../input/global-wheat-detection/train.csv'
train_dir = '../input/global-wheat-detection/train/'
test_dir = '../input/global-wheat-detection/test/'

df = pd.read_csv(train_csv)
# df

## Preprocessing Data

In [None]:
def get_dict_list():
    
    # Getting a key-value pair to separate training images into
    # training data and validation data. Used for dataloading
    # purpose.
    train_img_dict = {}
    val_img_dict = {}
    img = glob.glob(train_dir + '/*.jpg')
    n = len(img)

    # 95% data for training
    train_keys = img[:int(0.95*n)]
    val_keys = img[int(0.95*n):]

    split_dict = {}
    for key in train_keys:
        split_dict[key] = 'train'
    for key in val_keys:
        split_dict[key] = 'val'

    for i in img:
        temp = i.split('/')[-1]
        img_name = temp.split('.')[0]
        orig_path = train_dir + img_name.split('_')[0] + '.jpg'
        if (split_dict[orig_path] == 'train'):
            train_img_dict[img_name] = i
        else:
            val_img_dict[img_name] = i
            
    # Appending training and validation image paths to 
    # corresponding lists to create separate dataframes.
    train_img_list = []
    val_img_list = []

    for i in img:
        temp = i.split('/')[-1]
        img_name = temp.split('.')[0]
        orig_path = train_dir + img_name.split('_')[0] + '.jpg'
        if (split_dict[orig_path] == 'train'):
            train_img_list.append(img_name)
        else:
            val_img_list.append(img_name)
            
    return train_img_dict, val_img_dict, train_img_list, val_img_list

In [None]:
# Function for creating a separate dataframe for training and validation images
def create_df(img_list):
    
    image_id = []
    bbox = []
    for i in range(len(img_list)):

        for img_id, box in zip(df['image_id'].values, df['bbox'].values):

            if img_list[i] == img_id:
                image_id.append(img_id)
                bbox.append(box)

    df_new = pd.DataFrame()
    df_new['image_id'] = image_id
    df_new['bbox'] = bbox
    
    return df_new

In [None]:
train_img_dict, val_img_dict, train_img_list, val_img_list = get_dict_list()

df_train = create_df(train_img_list)
df_val = create_df(val_img_list)

## Evaluation Metrics

In [None]:
# BoilerPlate code
class AverageMeter(object):
    
    # Keeps track of most recent, average, sum, and count of a metric.
    
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
# AS PER COMPETITION METRIC
# BoilerPlate Code
iou_thresholds = numba.typed.List()

for x in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75]:
    iou_thresholds.append(x)

@jit(nopython=True)
def calculate_iou(gt, pr, form='pascal_voc') -> float:
    """Calculates the Intersection over Union.

    Args:
        gt: (np.ndarray[Union[int, float]]) coordinates of the ground-truth box
        pr: (np.ndarray[Union[int, float]]) coordinates of the prdected box
        form: (str) gt/pred coordinates format
            - pascal_voc: [xmin, ymin, xmax, ymax]
            - coco: [xmin, ymin, w, h]
    Returns:
        (float) Intersection over union (0.0 <= iou <= 1.0)
    """
    if form == 'coco':
        gt = gt.copy()
        pr = pr.copy()

        gt[2] = gt[0] + gt[2]
        gt[3] = gt[1] + gt[3]
        pr[2] = pr[0] + pr[2]
        pr[3] = pr[1] + pr[3]

    # Calculate overlap area
    dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1
    
    if dx < 0:
        return 0.0
    
    dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1

    if dy < 0:
        return 0.0

    overlap_area = dx * dy

    # Calculate union area
    union_area = (
            (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1) +
            (pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1) -
            overlap_area
    )

    return overlap_area / union_area

@jit(nopython=True)
def find_best_match(gts, pred, pred_idx, threshold = 0.5, form = 'pascal_voc', ious=None) -> int:
    """Returns the index of the 'best match' between the
    ground-truth boxes and the prediction. The 'best match'
    is the highest IoU. (0.0 IoUs are ignored).

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        pred: (List[Union[int, float]]) Coordinates of the predicted box
        pred_idx: (int) Index of the current predicted box
        threshold: (float) Threshold
        form: (str) Format of the coordinates
        ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.

    Return:
        (int) Index of the best match GT box (-1 if no match above threshold)
    """
    best_match_iou = -np.inf
    best_match_idx = -1

    for gt_idx in range(len(gts)):
        
        if gts[gt_idx][0] < 0:
            # Already matched GT-box
            continue
        
        iou = -1 if ious is None else ious[gt_idx][pred_idx]

        if iou < 0:
            iou = calculate_iou(gts[gt_idx], pred, form=form)
            
            if ious is not None:
                ious[gt_idx][pred_idx] = iou

        if iou < threshold:
            continue

        if iou > best_match_iou:
            best_match_iou = iou
            best_match_idx = gt_idx

    return best_match_idx

@jit(nopython=True)
def calculate_precision(gts, preds, threshold = 0.5, form = 'coco', ious=None) -> float:
    """Calculates precision for GT - prediction pairs at one threshold.

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
               sorted by confidence value (descending)
        threshold: (float) Threshold
        form: (str) Format of the coordinates
        ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.

    Return:
        (float) Precision
    """
    n = len(preds)
    tp = 0
    fp = 0

    # for pred_idx, pred in enumerate(preds_sorted):
    for pred_idx in range(n):

        best_match_gt_idx = find_best_match(gts, preds[pred_idx], pred_idx,
                                            threshold=threshold, form=form, ious=ious)

        if best_match_gt_idx >= 0:
            # True positive: The predicted box matches a gt box with an IoU above the threshold.
            tp += 1
            # Remove the matched GT box
            gts[best_match_gt_idx] = -1

        else:
            # No match
            # False positive: indicates a predicted box had no associated gt box.
            fp += 1

    # False negative: indicates a gt box had no associated predicted box.
    fn = (gts.sum(axis=1) > 0).sum()

    return tp / (tp + fp + fn)

@jit(nopython=True)
def calculate_image_precision(gts, preds, thresholds = (0.5, ), form = 'coco') -> float:
    """Calculates image precision.

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
               sorted by confidence value (descending)
        thresholds: (float) Different thresholds
        form: (str) Format of the coordinates

    Return:
        (float) Precision
    """
    n_threshold = len(thresholds)
    image_precision = 0.0
    
    ious = np.ones((len(gts), len(preds))) * -1
    # ious = None

    for threshold in thresholds:
        precision_at_threshold = calculate_precision(gts.copy(), preds, threshold=threshold,
                                                     form=form, ious=ious)
        image_precision += precision_at_threshold / n_threshold

    return image_precision

def calculate_final_score(all_predictions, score_threshold,form):
    final_scores = []
    for i in range(len(all_predictions)):
        gt_boxes = all_predictions[i]['gt_boxes'].copy()
        pred_boxes = all_predictions[i]['pred_boxes'].copy()
        scores = all_predictions[i]['scores'].copy()
        image_id = all_predictions[i]['image_id']

        indexes = np.where(scores>score_threshold)
        pred_boxes = pred_boxes[indexes]
        scores = scores[indexes]

        image_precision = calculate_image_precision(gt_boxes, pred_boxes,thresholds=iou_thresholds,form=form)
        final_scores.append(image_precision)

    return np.mean(final_scores)

## Visualization Functions

In [None]:
def get_image(img_dir, dataframe, idx=None, image_id=None):
    '''
    Read and output the image in the form of numpy arrays.
    Args:
        img_dir: image directory.
        dataframe: dataframe.
        idx: index to get the image with ground truth boxes before training.
        image_id: image ID, to output the image while validating.
    Returns:
        image in the form of numpy arrays.
    '''
    
    if image_id is None:
        img = os.path.join(img_dir, dataframe['image_id'][idx]) + '.jpg'
    else:
        img = os.path.join(img_dir, image_id) + '.jpg'
                           
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def show_image(img_dir, dataframe, boxes_pred=None, show=False, idx=None, image_id=None):
    '''
    Function to display the image corresponding to passed arguments.
    Args:
        img_dir: image directory.
        dataframe: dataframe.
        boxes_pred: predicted box coordinates while validation.
        show: displays the image if True.
        idx: index to get the image with ground truth boxes before training.
        image_id: image ID, to output the image while validating.
    Returns:
        if show is True then displays image with ground truth boxes before training
        if boxes_pred is None or if boxes_pred is given then displays image with ground
        truth and predicted boxes.
        if show is False then returns ground truth coordinates and predicted coordinates.
    '''

    if image_id is not None:
        image_id = image_id
    else:
        image_id = df['image_id'][idx]
    img = get_image(img_dir, dataframe, idx, image_id)
    boxes_gt = df[df['image_id'] == image_id]['bbox'].values
    for box in boxes_gt:
        box = ast.literal_eval(box) # https://stackoverflow.com/questions/29552950/when-to-use-ast-literal-eval/29556591
        x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
        w = x + w  #x_max = w
        h = y + h  #y_max = h
        gt_rect = cv2.rectangle(img, (x,y), (w, h), (0,255,0), 3)

    if boxes_pred is not None:
        for box in boxes_pred:
            x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
            # w = x + w  #x_max = w
            # h = y + h  #y_max = h
            pred_rect = cv2.rectangle(img, (x,y), (w, h), (255,0,0), 2)

    if show:
        if boxes_pred is None:
            plt.figure(figsize=(8,8))
            plt.axis('off')
            plt.title('Image ID: '+image_id, fontdict={'color':'cyan'})
            plt.imshow(gt_rect)
            plt.show()

        else:
            plt.figure(figsize=(8,8))
            plt.axis('off')
            plt.title('Image ID: '+image_id+'       Green: Ground Truth, Box Count: '+str(len(boxes_gt))
            +'     Red: Predicted, Box Count: '+str(len(boxes_pred)), fontdict={'color':'cyan'})
            plt.imshow(pred_rect)
            plt.show()

    else:
        if boxes_pred is None:
            return gt_pred
        else:
            return gt_rect, pred_rect

def val_show(gts, dataframe, boxes, image_id):
    '''
    Function to select predicted boxes above threshold and passing that
    as an argument to show_image function.
    Args:
        gts: ground truth box coordinates.
        dataframe: dataframe
        boxes: predicted boxes
        image_id: corresponding image ID.
    Returns:
        arguments for show_image function.
    '''

    ious = np.ones((len(gts), len(boxes))) * -1
    boxes_pred_itr = [] # for all boxes(repetition of boxes)
    boxes_pred = [] # for unique boxes
    for pred_idx in range(len(boxes)):
        best_match_gt_idx = find_best_match(gts, boxes[pred_idx], pred_idx, threshold=0.5, ious=ious)
        boxes_pred_itr.append(boxes[best_match_gt_idx])
    
    # for removing duplicate boxes
    boxes_pred = list(set(tuple(box) for box in boxes_pred_itr))
    show_image(train_dir, dataframe, boxes_pred, show=True, image_id=image_id)

In [None]:
# Let's visualize a image from training directory.
show_image(train_dir, df, show=True, idx=365)

## Dataset

In [None]:
class dataset(Dataset):
    def __init__(self, df, train=True, transforms=None):
        
        self.df = df
        self.train = train
        self.image_ids = self.df['image_id'].unique()
        self.transforms = transforms

    def __getitem__(self, idx):
        
        image_id = self.image_ids[idx]
        if self.train:
            img_path = train_img_dict.get(image_id)
        else:
            img_path = val_img_dict.get(image_id)

        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        # img = np.array(Image.open(img_path).convert("RGB")).astype(np.float32)
        img = img / 255.0

        boxes = np.int64(np.array([ast.literal_eval(box) for box in self.df[self.df['image_id'] == image_id]['bbox'].values]))
        boxes[:,2] += boxes[:,0]
        boxes[:,3] += boxes[:,1]

        target = {}
        target['boxes'] = torch.as_tensor(boxes, dtype = torch.int64)
        target['labels'] = torch.ones((len(boxes,)), dtype = torch.int64)
        target['iscrowd'] = torch.zeros((len(boxes,)), dtype = torch.int64)
        target['area'] = torch.as_tensor(((boxes[:,3] - boxes[:,1]) * (boxes[:,2] - boxes[:,0])), dtype = torch.float32)
        target['image_id'] = torch.tensor([idx])
        
        if self.transforms is not None:
            img = self.transforms(img)
            
        return img, target, image_id
    
    def __len__(self) -> int:
        return self.image_ids.shape[0]

## Model

In [None]:
# https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
def get_model(num_classes, backbone = None):
    '''
    Model function to output network according to arguments.
    Args:
        num_classes: number of classes(total_classes+1 for background)
        backbone: to design network with other backbone, default backbone
                  of faster RCNN is resnet50.
    Returns:
        model.
    '''
    
    if backbone == 'mobile_net': 
        net = mobilenet_v2(pretrained = True)
        backbone_ft = net.features
        backbone_ft.out_channels = 1280
        
    elif backbone == 'vgg19':
        net = vgg19(pretrained = True)
        backbone_ft = net.features
        backbone_ft.out_channels = 512 
    
    # https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn
    elif backbone == 'resnet101':
        net = resnet101(pretrained = True)
        modules = list(net.children())[:-1]
        backbone_ft = nn.Sequential(*modules)
        backbone_ft.out_channels = 2048
        
    if backbone is None:
        
        model = fasterrcnn_resnet50_fpn(pretrained = True)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # print(in_features) = 1024
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        return model
    
    else:

        anchor_gen = AnchorGenerator(sizes=((32, 64, 128),))
        # featmap_names = [0] gives list index out of range error.
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names = ['0'],
                                                        output_size = 7,
                                                        sampling_ratio = 2)
        model = FasterRCNN(backbone_ft,
                           num_classes,
                           rpn_anchor_generator = anchor_gen,
                           box_roi_pool = roi_pooler)
        
        return model

## Training and Validation Function

In [None]:
def train_fn(dataloader, epoch, model, optimizer, device):
    '''
    Training function.
    Args:
        dataloader: for loading training data batch-wise.
        model: network architecture for training.
        optimizer: optimizer used for gradient descent.
        device: computation device for training.
    Returns:
        loss after every epoch.
    '''
    
    model.train()  # training mode enables dropout
    
    loss = AverageMeter()  # loss update/reset
    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    
    start = time.time()
    
    loader = tqdm(dataloader, total = len(dataloader))
    for step, (images, targets, image_id) in enumerate(loader):
        
        # take the list of images and targets to feed the network
        images = [image.to(device, dtype=torch.float32) for image in images]
        targets = [{k: v.to(device) for k,v in target.items()} for target in targets]
        data_time.update(time.time() - start)

        # forward + backward + optimize
        loss_dict = model(images, targets)
        # loss_dict: {'loss_classifier': tensor(0.6591, device='cuda:0', grad_fn=<NllLossBackward>),
                    # 'loss_box_reg': tensor(0.7574, device='cuda:0', grad_fn=<DivBackward0>),
                    # 'loss_objectness': tensor(0.6313, device='cuda:0',
                    #                           grad_fn=<BinaryCrossEntropyWithLogitsBackward>),
                    #  'loss_rpn_box_reg': tensor(0.1344, device='cuda:0', grad_fn=<DivBackward0>)}
        losses = sum(loss_ind for loss_ind in loss_dict.values())
        
        optimizer.zero_grad()  # zero the parameter gradients
        losses.backward()
        optimizer.step()
        
        batch_time.update(time.time() - start)
        # Update loss of after every batch.
        loss.update(losses.item(), BATCH_SIZE)
        
        start = time.time()
        
        if step % ITER_STEP == 0:
            
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss: {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch, step, len(dataloader),
                                                                  batch_time=batch_time,
                                                                  data_time=data_time, loss=loss))
        # To check the loss real-time while iterating.
        loader.set_postfix(loss=loss.avg)

    return loss

In [None]:
def val_fn(dataloader, model, device, display_random=False, show_img_num=None):
    '''
    Validation function with epoch wise visualization.
    Args:
        dataloader: to load the data batch-wise.
        model: trained model for validation.
        device: device used for computation.
        display_random: for visualiztion of random images in every epoch.
        show_img_num: to visualize a particular image, number between 0 and
                      batch size.
    Returns:
        Visualizations and a list of dictionary consisting of predicted box
        coordinates, corresponding scores, ground truth box coordinates and
        image ID.
    '''
    model.eval()
    predictions = []
    
    with torch.no_grad():
        
        loader = tqdm(dataloader, total=len(dataloader))
        for step, (images, targets, image_id) in enumerate(loader):
            
            images = [image.to(device, dtype=torch.float32) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            output = model(images)

            for i in range(len(images)):
                # tensor.detach() creates a tensor that shares storage with tensor
                # that does not require grad. It detaches the output from the computational
                # graph. So no gradient will be backpropagated along this variable.
                boxes = output[i]['boxes'].detach().cpu().numpy()
                scores = output[i]['scores'].detach().cpu().numpy()

                # boxes_itr = boxes
                predictions.append({
                    'pred_boxes': (boxes).astype(int),
                    'scores': scores,
                    'gt_boxes': (targets[i]['boxes'].cpu().numpy()).astype(int),
                    'image_id': image_id[i],
                })

                if display_random:
                    itr = np.random.randint(low=0, high=BATCH_SIZE-1, size=1)
                else:
                    itr = show_img_num

                if step%15==0 and i==itr:

                    gts = (targets[i]['boxes'].cpu().numpy()).astype(int)
                    val_show(gts, df_val, boxes, image_id[i])
                
    return predictions

In [None]:
# collate_fn is called with a list of data samples at each time.
# It is expected to collate the input samples into a batch for
# yielding from the data loader iterator.
# https://discuss.pytorch.org/t/how-to-use-collate-fn/27181
def collate_fn(batch):
    return tuple(zip(*batch))

## Engine(main function)

In [None]:
def engine(device, model_path=None, init_epoch=None, resume=False):
    '''
    Main funtion to train and validate.
    Args:
        device: device for computation.
        model_path: path of saved model.
        init_epoch: initial epoch to resume training from.
        resume: to resume training from last epoch.
    Return:
        final_score
    '''
    
    final_score = []
    best_score = 0
    
    # Custom DataLoaders
    train_dataset = dataset(df_train, transforms=T.Compose([T.ToTensor()]))
    valid_dataset = dataset(df_val, train=False, transforms=T.Compose([T.ToTensor()]))

    train_loader = DataLoader(train_dataset,
                              BATCH_SIZE,
                              shuffle=False,
                              num_workers=8,
                              collate_fn=collate_fn)
    val_loader = DataLoader(valid_dataset,
                            BATCH_SIZE,
                            shuffle=False,
                            num_workers=8,
                            collate_fn=collate_fn, )
    
    if resume:
        model = torch.load(model_path)
        init_epoch = init_epoch
    else:
        model = get_model(2)
        init_epoch = 0
    model.to(device)  # loading model on GPU

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.002, momentum=0.9, weight_decay=0.0007)

    for epoch in range(init_epoch, EPOCHS):
        '''
        Call the train function then validation function to take a look on how
        model is performed in that epoch. Output of val_fn, prediction will be
        given to evaluation metrics for getting score.
        '''
        train_loss = train_fn(train_loader, epoch, model, optimizer, device)
        prediction = val_fn(val_loader, model, device, display_random=True)
        valid_score = calculate_final_score(prediction, 0.5, 'pascal_voc')

        if valid_score > best_score:
                best_score = valid_score
                torch.save(model.state_dict(), f'frcnn_best_{epoch}.pth')
#                 torch.save(model, f'frcnn_best_model_epoch_{epoch}') 
        final_score.append([best_score, epoch])
        
    return final_score

In [None]:
%%time

BATCH_SIZE = 8
ITER_STEP = 100
EPOCHS = 10
model_path = None
INIT_EPOCH = None
DEVICE = torch.device('cuda')

final_score = engine(DEVICE)

In [None]:
def get_best_epoch():
    pprint(final_score)

    # Get the epoch number for weights with best score.
    max_score = -1
    for score in final_score:
        if score[0] > max_score:
            max_score = score[0]
            epoch = score[1]
    print('Best model found in Epoch {}'.format(epoch))
    
    return epoch

In [None]:
epoch = get_best_epoch()

## Evaluation and Submission

In [None]:
test_df = pd.read_csv('../input/global-wheat-detection/sample_submission.csv')

In [None]:
# Dataset class for evaluation.
class eval_dataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        img = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img /= 255.0

        if self.transforms is not None:
            img = self.transforms(img)

        return img, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
# Custom dataloader for test data.
test_dataset = eval_dataset(test_df, test_dir, transforms=T.Compose([T.ToTensor()]))
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [None]:
def evaluate(device, model):
    
    model.eval()
    detection_threshold = 0.5
    results = []

    for images, image_ids in test_loader:

        images = list(image.to(device) for image in images)
        outputs = model(images)

        for i, image in enumerate(images):

            boxes = outputs[i]['boxes'].data.cpu().numpy()
            scores = outputs[i]['scores'].data.cpu().numpy()

            boxes = boxes[scores >= detection_threshold].astype(np.int32)
            scores = scores[scores >= detection_threshold]
            image_id = image_ids[i]

            img = get_image(test_dir, test_df, image_id=image_id)

            for box in boxes:
                x, y, w, h = int(box[0]), int(box[1]), int(box[2]), int(box[3])
                rect = cv2.rectangle(img, (x,y), (w,h), (220, 0, 0), 3)

            plt.figure(figsize=(8,8))
            plt.axis('off')
            plt.title('Image ID: '+image_id, fontdict={'color':'cyan'})
            plt.imshow(rect)
            plt.show()

            result = {
                'image_id': image_id,
                'PredictionString': format_prediction_string(boxes, scores)
            }

            results.append(result)
            
    return results

In [None]:
DEVICE = torch.device('cpu')
weight_path = f'./frcnn_best_{epoch}.pth'

model = get_model(2)
model.load_state_dict(torch.load(weight_path))

results = evaluate(DEVICE, model = model)

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
test_df.to_csv('submission.csv', index=False)

References:  
https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train  
https://towardsdatascience.com/understanding-pytorch-with-an-example-a-step-by-step-tutorial-81fc5f8c4e8e