## References
* Evaluate F2 for each CONF level in validation dataset using the Yolov5 model.
* The "F2" is the comptetation metric, IOU @ 0.3 to 0.8 with step of 0.05.


## Used model
I use sheep's model.
* https://www.kaggle.com/steamedsheep/yolov5-is-all-you-need
* https://www.kaggle.com/steamedsheep/reef-baseline-fold12





If you find any bugs or mistakes, please let me know.
Thank you.

In [None]:

import numpy as np 
import pandas as pd 
import torch
from tqdm import tqdm
import sys
import cv2

import ast
from tqdm.notebook import tqdm
tqdm.pandas()
sys.path.append('../input/tensorflow-great-barrier-reef')

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
%cd /kaggle/working
ckpt_path = '../input/yolov5-002/YOLOv5/yolov5-3600-sheepsame-001-fold-1/weights/best.pt'
infer_size = 5000
select_fold = 1
model = torch.hub.load('../input/yolov5-lib-ds', 
                       'custom', 
                       path=ckpt_path,
                       source='local',
                       force_reload=True)  # local repo
model.conf = 0.01

# model.iou  = 0.65 ####

In [None]:
def IOU_coco(bbox1, bbox2):
    '''
        adapted from https://stackoverflow.com/questions/25349178/calculating-percentage-of-bounding-box-overlap-for-image-detector-evaluation
    '''
    x_left = max(bbox1[0], bbox2[0])
    y_top = max(bbox1[1], bbox2[1])
    x_right = min(bbox1[0] + bbox1[2], bbox2[0] + bbox2[2])
    y_bottom = min(bbox1[1] + bbox1[3], bbox2[1] + bbox2[3])
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    bb1_area = bbox1[2] * bbox1[3]
    bb2_area = bbox2[2] * bbox2[3]
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)

    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
%cd /kaggle/working

from sklearn.model_selection import GroupKFold
# import ast

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'

df = pd.read_csv("/kaggle/input/tensorflow-great-barrier-reef/train.csv")

# Don't filter for annotated frames. Include frames with no bboxes as well!
df["num_bbox"] = df['annotations'].apply(lambda x: str.count(x, 'x'))
df_train = df

# Annotations 
df_train['annotations'] = df_train['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df_train['bboxes'] = df_train.annotations.progress_apply(get_bbox)

df_train = df_train.progress_apply(get_path, axis=1)

# kf = GroupKFold(n_splits = 5) 
# df_train = df_train.reset_index(drop=True)
# df_train['fold'] = -1
# for fold, (train_idx, val_idx) in enumerate(kf.split(df_train, y = df_train.video_id.tolist(), groups=df_train.sequence)):
#     df_train.loc[val_idx, 'fold'] = fold

df_train.head(5)


In [None]:
# select_fold = 1
df_test = df_train[df_train.video_id == select_fold]
print(len(df_test))
df_test.head()

In [None]:
image_path = df_test[df_test.num_bbox>3].image_path.tolist()[0]
img = cv2.imread(image_path)[...,::-1]
r = model(img, size=infer_size, augment=False)
r.save("/kaggle/working")
bbox_img = cv2.imread("/kaggle/working/image0.jpg")[...,::-1]
from PIL import Image
display(Image.fromarray(cv2.resize(bbox_img, dsize=(640,360))))

In [None]:
df_test.query("num_bbox>0").head()

In [None]:
from typing import List

import numpy as np
import torch
from torchvision.ops import box_iou


def calculate_score(
    preds: List[torch.Tensor],
    gts: List[torch.Tensor],
    iou_th: float
) -> float:
    num_tp = 0
    num_fp = 0
    num_fn = 0
    for p, gt in zip(preds, gts):
        if len(p) and len(gt):
            iou_matrix = box_iou(p, gt)
            tp = len(torch.where(iou_matrix.max(0)[0] >= iou_th)[0])
            fp = len(p) - tp
            fn = len(torch.where(iou_matrix.max(0)[0] < iou_th)[0])
            num_tp += tp
            num_fp += fp
            num_fn += fn
        elif len(p) == 0 and len(gt):
            num_fn += len(gt)
        elif len(p) and len(gt) == 0:
            num_fp += len(p)
    score = 5 * num_tp / (5 * num_tp + 4 * num_fn + num_fp)
    return score


In [None]:
import copy

df_sample = df_test
image_paths = df_sample.image_path.tolist()
gt = copy.deepcopy(df_sample.bboxes.tolist())
gtmem = copy.deepcopy(df_sample.bboxes.tolist())

In [None]:
%%time
%cd /kaggle/working
import cv2

preds_list = [] # Confidence scores of true positives
gts_list = [] # Confidence scores of true positives

for i in tqdm(range(len(image_paths))):
# for i in tqdm(range(470, 500)):
# for i in tqdm(range(475, 480)):
    TEST_IMAGE_PATH = image_paths[i]
    img = cv2.imread(TEST_IMAGE_PATH)[...,::-1]
    h, w, _ = img.shape
    gts = []
    for j in gt[i]: # [[x,y,w,h],...]
        gts.append([j[0], j[1], j[0]+j[2], j[1]+j[3]])
    r = model(img, size=infer_size, augment=False)
    preds_list.append(r.pandas().xyxy[0])
    gts_list.append(torch.Tensor(gts))


In [None]:
%%time
F2list = []
F2max = 0.0
F2maxat = -1.0
for conf in tqdm(np.arange(0.0, 1.0, 0.01)):
    preds_conf = [torch.Tensor(i.query("confidence>@conf")[['xmin','ymin','xmax','ymax']].values) if len(i)!=0 else torch.tensor([]) for i in preds_list ]
    iou_ths = np.arange(0.3, 0.85, 0.05)
#     iou_ths = [0.65,]
    F2 = np.mean([calculate_score(preds_conf, gts_list, iou_th) for iou_th in iou_ths])
#     print(score)
    F2list.append((conf, F2))
    if F2max < F2:
        F2max = F2
        F2maxat = conf


In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
plt.scatter(*zip(*F2list))
plt.title("CONF vs F2 score")
plt.xlabel('CONF')
plt.ylabel('F2')
plt.show()
print(f'F2 max is {F2max} at CONF = {F2maxat}')