In [None]:
import ast
import numpy as np
import pandas as pd
from typing import List

import torch
from torchvision.ops import box_iou

In [None]:
# 

def calculate_score(
    preds: List[torch.Tensor],
    gts: List[torch.Tensor],
    iou_th: float
) -> float:
    num_tp = 0
    num_fp = 0
    num_fn = 0
    for p, GT in zip(preds, gts):
        if len(p) and len(GT):
            gt = GT.clone()
            gt[:, 2] = gt[:, 0] + gt[:, 2]
            gt[:, 3] = gt[:, 1] + gt[:, 3]
            pp = p.clone()
            pp[:, 2] = pp[:, 0] + pp[:, 2]
            pp[:, 3] = pp[:, 1] + pp[:, 3]
            iou_matrix = box_iou(pp, gt)
            tp = len(torch.where(iou_matrix.max(0)[0] >= iou_th)[0])
            fp = len(p) - tp
            fn = len(torch.where(iou_matrix.max(0)[0] < iou_th)[0])
            num_tp += tp
            num_fp += fp
            num_fn += fn
        elif len(p) == 0 and len(GT):
            num_fn += len(GT)
        elif len(p) and len(GT) == 0:
            num_fp += len(p)
    score = 5 * num_tp / (5 * num_tp + 4 * num_fn + num_fp)
    return score

In [None]:

df = pd.read_csv('../input/tensorflow-great-barrier-reef/train.csv')

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

df['annotations'] = df['annotations'].apply(lambda x: ast.literal_eval(x))
df['bboxes'] = df.annotations.apply(get_bbox)

In [None]:
df['preds'] = df['bboxes']  # just assume that predictions is totally the same as GT
df

In [None]:
predictions = []
gts = []
for i, row in (df.iterrows()):
    if type(row.preds) != float and len(row.preds) > 0:
        preds = torch.tensor(row.preds)
        predictions.append(preds)
    else:
        predictions.append([])
    if type(row.bboxes) != float and len(row.bboxes) > 0:
        gts.append(torch.tensor(row.bboxes))
    else:
        gts.append([])


iou_ths = np.arange(0.3, 0.85, 0.05)
scores = [calculate_score(predictions, gts, iou_th) for iou_th in iou_ths]
np.mean(scores)

So the F2 = 1.0 when predictions is totally the same as GT.

We use this to reach F2 = 0.74+ by 3-fold cross validation (video_id split)

If you are interested you can use this algorithm to compare your CV with ours.

Thanks!