In [None]:
import torch
from torchvision.transforms import ToTensor, Normalize, Compose, Resize
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pathlib import Path

ORIGIN_W = 1280
ORIGIN_H = 720
MODEL_W = 640
MODEL_H = 640

w_scale = ORIGIN_W / MODEL_W
h_scale = ORIGIN_H / MODEL_H

IOU_TH = 0.65
CONFIDANCE_TH = 0.16

device = 'cuda' if torch.cuda.is_available() else 'cpu'

transform = Compose([
        ToTensor(),
        Resize((640, 640)),
        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])

In [None]:
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
import torch.nn as nn
from torch.nn.modules.utils import _pair
from torchvision.ops import batched_nms

class MlvlPointGenerator(nn.Module):
    """Standard points generator for multi-level (Mlvl) feature maps in 2D
    points-based detectors.

    Args:
        strides (list[int] | list[tuple[int, int]]): Strides of anchors
            in multiple feature levels in order (w, h).
        offset (float): The offset of points, the value is normalized with
            corresponding stride. Defaults to 0.5.

    Generate grid points of multiple feature levels.

        Args:
            featmap_sizes (list[tuple]): List of feature map sizes in
                multiple feature levels, each size arrange as
                as (h, w).
            dtype (:obj:`dtype`): Dtype of priors. Default: torch.float32.
            device (str): The device where the anchors will be put on.
            with_stride (bool): Whether to concatenate the stride to
                the last dimension of points.

        Return:
            list[torch.Tensor]: Points of  multiple feature levels.
            The sizes of each tensor should be (N, 2) when with stride is
            ``False``, where N = width * height, width and height
            are the sizes of the corresponding feature level,
            and the last dimension 2 represent (coord_x, coord_y),
            otherwise the shape should be (N, 4),
            and the last dimension 4 represent
            (coord_x, coord_y, stride_w, stride_h).
        
    """

    def __init__(self, strides, 
                    featmap_sizes,
                    offset=0.5,
                    dtype=torch.float16,
                    with_stride=False):
        super(MlvlPointGenerator, self).__init__()
        self.strides = [_pair(stride) for stride in strides]
        self.offset = offset
        assert self.num_levels == len(featmap_sizes)
        multi_level_priors = []
        for i in range(self.num_levels):
            priors = self.single_level_grid_priors(
                featmap_sizes[i],
                level_idx=i,
                dtype=dtype,
                with_stride=with_stride)
            multi_level_priors.append(priors)

        flatten_priors = torch.cat(multi_level_priors).float()
        self.register_buffer("flatten_priors", flatten_priors)
        

    @property
    def num_levels(self):
        """int: number of feature levels that the generator will be applied"""
        return len(self.strides)

    @property
    def num_base_priors(self):
        """list[int]: The number of priors (points) at a point
        on the feature grid"""
        return [1 for _ in range(len(self.strides))]

    def _meshgrid(self, x, y, row_major=True):
        yy, xx = torch.meshgrid(y, x)
        if row_major:
            # warning .flatten() would cause error in ONNX exporting
            # have to use reshape here
            return xx.reshape(-1), yy.reshape(-1)
        else:
            return yy.reshape(-1), xx.reshape(-1)

    def single_level_grid_priors(self,
                                 featmap_size,
                                 level_idx,
                                 dtype=torch.float16,
                                 with_stride=False):
        """Generate grid Points of a single level.

        Note:
            This function is usually called by method ``self.grid_priors``.

        Args:
            featmap_size (tuple[int]): Size of the feature maps, arrange as
                (h, w).
            level_idx (int): The index of corresponding feature map level.
            dtype (:obj:`dtype`): Dtype of priors. Default: torch.float32.
            device (str, optional): The device the tensor will be put on.
                Defaults to 'cuda'.
            with_stride (bool): Concatenate the stride to the last dimension
                of points.

        Return:
            Tensor: Points of single feature levels.
            The shape of tensor should be (N, 2) when with stride is
            ``False``, where N = width * height, width and height
            are the sizes of the corresponding feature level,
            and the last dimension 2 represent (coord_x, coord_y),
            otherwise the shape should be (N, 4),
            and the last dimension 4 represent
            (coord_x, coord_y, stride_w, stride_h).
        """
        feat_h, feat_w = featmap_size
        stride_w, stride_h = self.strides[level_idx]
        shift_x = (torch.arange(0, feat_w) + self.offset) * stride_w
        # keep featmap_size as Tensor instead of int, so that we
        # can convert to ONNX correctly
        shift_x = shift_x.to(dtype)

        shift_y = (torch.arange(0, feat_h) + self.offset) * stride_h
        # keep featmap_size as Tensor instead of int, so that we
        # can convert to ONNX correctly
        shift_y = shift_y.to(dtype)
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        if not with_stride:
            shifts = torch.stack([shift_xx, shift_yy], dim=-1)
        else:
            # use `shape[0]` instead of `len(shift_xx)` for ONNX export
            stride_w = shift_xx.new_full((shift_xx.shape[0], ),
                                         stride_w).to(dtype)
            stride_h = shift_xx.new_full((shift_yy.shape[0], ),
                                         stride_h).to(dtype)
            shifts = torch.stack([shift_xx, shift_yy, stride_w, stride_h],
                                 dim=-1)
        all_points = shifts
        return all_points

    def valid_flags(self, featmap_sizes, pad_shape):
        """Generate valid flags of points of multiple feature levels.

        Args:
            featmap_sizes (list(tuple)): List of feature map sizes in
                multiple feature levels, each size arrange as
                as (h, w).
            pad_shape (tuple(int)): The padded shape of the image,
                 arrange as (h, w).
            device (str): The device where the anchors will be put on.

        Return:
            list(torch.Tensor): Valid flags of points of multiple levels.
        """
        assert self.num_levels == len(featmap_sizes)
        multi_level_flags = []
        for i in range(self.num_levels):
            point_stride = self.strides[i]
            feat_h, feat_w = featmap_sizes[i]
            h, w = pad_shape[:2]
            valid_feat_h = min(int(np.ceil(h / point_stride[1])), feat_h)
            valid_feat_w = min(int(np.ceil(w / point_stride[0])), feat_w)
            flags = self.single_level_valid_flags((feat_h, feat_w),
                                                  (valid_feat_h, valid_feat_w))
            multi_level_flags.append(flags)
        return multi_level_flags

    def single_level_valid_flags(self,
                                 featmap_size,
                                 valid_size):
        """Generate the valid flags of points of a single feature map.

        Args:
            featmap_size (tuple[int]): The size of feature maps, arrange as
                as (h, w).
            valid_size (tuple[int]): The valid size of the feature maps.
                The size arrange as as (h, w).
            device (str, optional): The device where the flags will be put on.
                Defaults to 'cuda'.

        Returns:
            torch.Tensor: The valid flags of each points in a single level \
                feature map.
        """
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = torch.zeros(feat_w, dtype=torch.bool)
        valid_y = torch.zeros(feat_h, dtype=torch.bool)
        valid_x[:valid_w] = 1
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
        valid = valid_xx & valid_yy
        return valid

    def sparse_priors(self,
                      prior_idxs,
                      featmap_size,
                      level_idx,
                      dtype=torch.float16,
                      ):
        """Generate sparse points according to the ``prior_idxs``.

        Args:
            prior_idxs (Tensor): The index of corresponding anchors
                in the feature map.
            featmap_size (tuple[int]): feature map size arrange as (w, h).
            level_idx (int): The level index of corresponding feature
                map.
            dtype (obj:`torch.dtype`): Date type of points. Defaults to
                ``torch.float32``.
            device (obj:`torch.device`): The device where the points is
                located.
        Returns:
            Tensor: Anchor with shape (N, 2), N should be equal to
            the length of ``prior_idxs``. And last dimension
            2 represent (coord_x, coord_y).
        """
        height, width = featmap_size
        x = (prior_idxs % width + self.offset) * self.strides[level_idx][0]
        y = ((prior_idxs // width) % height +
             self.offset) * self.strides[level_idx][1]
        prioris = torch.stack([x, y], 1).to(dtype)
        return prioris

strides=[8, 16, 32]
featmap_sizes = [torch.Size([int(MODEL_H/stride), int(MODEL_W/stride)]) for stride in strides]
prior_generator = MlvlPointGenerator(strides, featmap_sizes, offset=0, with_stride=True)
flatten_priors = prior_generator.flatten_priors
flatten_priors

class InferModule:
    def __init__(self, model, flatten_priors=flatten_priors, iou_th=IOU_TH, conf_th=CONFIDANCE_TH):
        self.model = model
        self.flatten_priors=flatten_priors
        self.iou_th = iou_th
        self.conf_th = conf_th
        
    @staticmethod
    def _bbox_decode(priors, bbox_preds):
        xys = (bbox_preds[..., :2] * priors[:, 2:]) + priors[:, :2]
        whs = bbox_preds[..., 2:].exp() * priors[:, 2:]

        tl_x = (xys[..., 0] - whs[..., 0] / 2)
        tl_y = (xys[..., 1] - whs[..., 1] / 2)
        br_x = (xys[..., 0] + whs[..., 0] / 2)
        br_y = (xys[..., 1] + whs[..., 1] / 2)
        decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1)
        return decoded_bboxes
    
    def _bboxes_nms(self, cls_scores, bboxes, score_factor, confidance_th = 0.25, iou_threshold = 0.65):

        cls_scores = torch.cat([1 - cls_scores, cls_scores], -1)
        max_scores, labels = torch.max(cls_scores, 1)
        valid_mask = score_factor * max_scores >= confidance_th
        
        bboxes = bboxes[valid_mask].float()
        scores = max_scores[valid_mask] * score_factor[valid_mask]
        labels = labels[valid_mask]

        if labels.numel() == 0:
            return bboxes, labels
        else:
            indexes = batched_nms(bboxes.double(), scores.double(), labels, iou_threshold=self.iou_th)
            return bboxes[indexes], scores[indexes]
    
    @staticmethod
    def _get_flatten_output(
                            cls_scores,
                            bbox_preds,
                            objectnesses,
                            num_classes=1
                            ):

        num_imgs = bbox_preds[0].shape[0]

        flatten_cls_preds = [
            cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,
                                                    num_classes)
            for cls_pred in cls_scores
        ]
        flatten_bbox_preds = [
            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
            for bbox_pred in bbox_preds
        ]
        flatten_objectness = [
            objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)
            for objectness in objectnesses
        ]

        flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1).double().detach().cpu()
        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1).double().detach().cpu()
        flatten_objectness = torch.cat(flatten_objectness, dim=1).double().detach().cpu()
        flatten_bboxes = InferModule._bbox_decode(flatten_priors, flatten_bbox_preds).double().detach().cpu()

        return flatten_cls_preds, flatten_objectness, flatten_bboxes, flatten_bbox_preds

    def forward(self, x, confidance=0.25):
        cls_score, bbox_pred, objectness = model(x)
        flatten_cls_preds, flatten_objectness, flatten_bboxes, flatten_bbox_preds = InferModule._get_flatten_output(
                                                                                            cls_scores=cls_score,
                                                                                            bbox_preds=bbox_pred,
                                                                                            objectnesses=objectness
                                                                                            )
        result_list = []
        flatten_cls_preds = flatten_cls_preds.sigmoid()
        flatten_objectness = flatten_objectness.sigmoid()

        for img_id in range(len(flatten_cls_preds)):
            _cls_scores = flatten_cls_preds[img_id].double().detach().cpu()
            _score_factor = flatten_objectness[img_id]
            _bboxes = flatten_bboxes[img_id]

            result_list.append(self._bboxes_nms(cls_scores=_cls_scores, bboxes=_bboxes, score_factor=_score_factor, confidance_th=confidance))
            
        result_list = [list(elem) for elem in result_list]
        #change tuple to list        
        model_bboxes = []
        for elem in result_list:
            bboxes = elem[0]
            scores = elem[1]
            if len(bboxes) == 0:
                continue
            for index, box in enumerate(elem[0]):
                if len(box) == 0:
                    continue
                x_min = float(box[0]*w_scale)
                y_min = float(box[1]*h_scale)
                w = float(box[2]*w_scale) - x_min
                h = float(box[3]*h_scale) - y_min
                model_bboxes.append([float(scores[index]), x_min, y_min, w, h])
        return model_bboxes

In [None]:
model = torch.jit.load('../input/d/lolik228/yolox-pt/yolox.pt')
infer_module = InferModule(model)

In [None]:
import pandas as pd
import os
import cv2
from tqdm import tqdm
import random

names = []
for i in range(3):
    path = '../input/tensorflow-great-barrier-reef/train_images/video_' + str(i) + '/'
    path_names = os.listdir(path)
    for name in path_names:
        names.append(path + name)
        
random.shuffle(names)

bad_names = []
name = np.random.choice(names)
image_np = cv2.imread(name)
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
tensor = transform(image_np)
tensor = torch.unsqueeze(tensor, 0).half().to(device)
infer_module.forward(tensor)

In [None]:
def model_predict(img_path, confidance_th):
    image_np = cv2.imread(img_path)
    image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
    tensor = transform(image_np)
    tensor = torch.unsqueeze(tensor, 0).half().to(device)
    predict = infer_module.forward(tensor, confidance_th)
    return predict

In [None]:
def calc_iou(bboxes1, bboxes2, bbox_mode='xywh'):
    assert len(bboxes1.shape) == 2 and bboxes1.shape[1] == 4
    assert len(bboxes2.shape) == 2 and bboxes2.shape[1] == 4
    
    bboxes1 = bboxes1.copy()
    bboxes2 = bboxes2.copy()
    
    if bbox_mode == 'xywh':
        bboxes1[:, 2:] += bboxes1[:, :2]
        bboxes2[:, 2:] += bboxes2[:, :2]

    x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1)
    x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1)
    xA = np.maximum(x11, np.transpose(x21))
    yA = np.maximum(y11, np.transpose(y21))
    xB = np.minimum(x12, np.transpose(x22))
    yB = np.minimum(y12, np.transpose(y22))
    interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0)
    boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
    boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
    iou = interArea / (boxAArea + np.transpose(boxBArea) - interArea)
    return iou

def f_beta(tp, fp, fn, beta=2):
    return (1+beta**2)*tp / ((1+beta**2)*tp + beta**2*fn+fp)

def calc_is_correct_at_iou_th(gt_bboxes, pred_bboxes, iou_th, verbose=False):
    gt_bboxes = gt_bboxes.copy()
    pred_bboxes = pred_bboxes.copy()
    
    tp = 0
    fp = 0
    for k, pred_bbox in enumerate(pred_bboxes): # fixed in ver.7
        ious = calc_iou(gt_bboxes, pred_bbox[None, 1:])
        max_iou = ious.max()
        if max_iou > iou_th:
            tp += 1
            gt_bboxes = np.delete(gt_bboxes, ious.argmax(), axis=0)
        else:
            fp += 1
        if len(gt_bboxes) == 0:
            fp += len(pred_bboxes) - (k + 1) # fix in ver.7
            break

    fn = len(gt_bboxes)
    return tp, fp, fn

def calc_is_correct(gt_bboxes, pred_bboxes):
    """
    gt_bboxes: (N, 4) np.array in xywh format
    pred_bboxes: (N, 5) np.array in conf+xywh format
    """
    if len(gt_bboxes) == 0 and len(pred_bboxes) == 0:
        tps, fps, fns = 0, 0, 0
        return tps, fps, fns
    
    elif len(gt_bboxes) == 0:
        tps, fps, fns = 0, len(pred_bboxes), 0
        return tps, fps, fns
    
    elif len(pred_bboxes) == 0:
        tps, fps, fns = 0, 0, len(gt_bboxes)
        return tps, fps, fns
    
    pred_bboxes = pred_bboxes[pred_bboxes[:,0].argsort()[::-1]] # sort by conf
    
    tps, fps, fns = 0, 0, 0
    for iou_th in np.arange(0.3, 0.85, 0.05):
        tp, fp, fn = calc_is_correct_at_iou_th(gt_bboxes, pred_bboxes, iou_th)
        tps += tp
        fps += fp
        fns += fn
    return tps, fps, fns

def calc_f2_score(gt_bboxes_list, pred_bboxes_list, verbose=False):
    """
    gt_bboxes_list: list of (N, 4) np.array in xywh format
    pred_bboxes_list: list of (N, 5) np.array in conf+xywh format
    """
    tps, fps, fns = 0, 0, 0
    for gt_bboxes, pred_bboxes in zip(gt_bboxes_list, pred_bboxes_list):
        tp, fp, fn = calc_is_correct(gt_bboxes, pred_bboxes)
        tps += tp
        fps += fp
        fns += fn
        if verbose:
            num_gt = len(gt_bboxes)
            num_pred = len(pred_bboxes)
            print(f'num_gt:{num_gt:<3} num_pred:{num_pred:<3} tp:{tp:<3} fp:{fp:<3} fn:{fn:<3}')
    return f_beta(tps, fps, fns, beta=2)

In [None]:
df = pd.read_csv('../input/d/lolik228/yolox-pt/valid_full_2_video.csv')
df['annotation'] = df.annotation.apply(eval)

In [None]:
# path = '../input/tensorflow-great-barrier-reef/'
# confidance_greed_search = []
# for confidance in tqdm(np.arange(0.01, 0.85, 0.05)):
#     predictions = []
#     targets = []
#     for i in range(len(df)):
#         row = df.iloc[i]
#         img_path = path + row['image_path']
#         annot = row['annotation']
#         target = []
#         for an in annot:
#             target.append([an['x_min'], an['y_min'], an['x_max'] - an['x_min'], an['y_max'] - an['y_min']])

#         predictions.append(np.array(model_predict(img_path, confidance)))
#         targets.append(np.array(target))

#     confidance_greed_search.append(calc_f2_score(targets, predictions))
    
# confidance_greed_search

In [None]:
# for confidance in tqdm(np.arange(0.01, 0.85, 0.05)):
#     print(confidance)

In [None]:
# import pandas as pd
# import os
# import cv2
# from tqdm import tqdm
# import random


# # df = pd.read_csv('../input/tensorflow-great-barrier-reef/train.csv')
# names = []
# for i in range(3):
#     path = '../input/tensorflow-great-barrier-reef/train_images/video_' + str(i) + '/'
#     path_names = os.listdir(path)
#     for name in path_names:
#         names.append(path + name)
        
# random.shuffle(names)

# bad_names = []
# for name in tqdm(names):
#     image_np = cv2.imread(name)
#     image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
#     tensor = transform(image_np)
#     tensor = tensor.unsqueeze(0).half().to(device)
    
#     bboxes = inder_module.forward(tensor)
    
        

In [None]:
import greatbarrierreef

env = greatbarrierreef.make_env()   # initialize the environment
iter_test = env.iter_test()  

In [None]:
submission_dict = {
    'id': [],
    'prediction_string': [],
}
confthre = 0.01
for (image_np, sample_prediction_df) in iter_test:
    tensor = transform(image_np)
    tensor = tensor.unsqueeze(0).half().to(device)
    
    bboxes = infer_module.forward(tensor, 0.65)
    predictions = []
    for i in range(len(bboxes)):
        box = bboxes[i]
        predictions.append('{:.2f} {} {} {} {}'.format(box[0], box[1], box[2], box[3], box[4]))
        
    prediction_str = ' '.join(predictions)
    sample_prediction_df['annotations'] = prediction_str
    env.predict(sample_prediction_df)
    print('Prediction:', prediction_str)

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()