In [7]:
import ast
import glob
import json
import os
import re
import shutil

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from IPython.core.interactiveshell import InteractiveShell
from PIL import Image
from torchvision.ops import box_iou
from tqdm import tqdm

InteractiveShell.ast_node_interactivity = "all"
import seaborn as sns
from IPython.display import clear_output

In [2]:
def show_img(img, bbox, pred=None):
    img_h, img_w = img.shape[:2]

    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.imshow(img)
    for i in range(len(bbox)):
        x_c, y_c, w, h = bbox[i]
        rect = plt.Rectangle(
            [(x_c - w / 2) * img_w, (y_c - h / 2) * img_h],
            w * img_w,
            h * img_h,
            ec="b",
            fc="none",
            lw=2.0,
        )
        ax.add_patch(rect)
    if pred is not None:
        for i in range(len(pred)):
            x_c, y_c, w, h = pred[i]
            rect = plt.Rectangle(
                [x_c - w / 2, y_c - h / 2],
                w,
                h,
                ec="r",
                fc="none",
                lw=2.0,
            )
            ax.add_patch(rect)
    plt.show();

In [3]:
TRAIN_DF_PART = "/app/_data/tensorflow-great-barrier-reef/train.csv"

In [4]:
df = pd.read_csv(TRAIN_DF_PART)
df["img_path"] = (
    "/app/_data/tensorflow-great-barrier-reef/train_images/video_"
    + df.video_id.astype("str")
    + "/"
    + df.video_frame.astype("str")
    + ".jpg"
)
df["annotations"] = df["annotations"].apply(lambda x: ast.literal_eval(x))
df["len_annotation"] = df["annotations"].str.len()
df["image_id"] = df["image_id"].str.replace("-", "_", regex=True)
df["new_img_path"] = f"/app/_data/images/" + df["image_id"] + ".jpg"
df["label"] = df["len_annotation"].apply(lambda x: 0 if x == 0 else 1)

In [5]:
def tp_fp_fn(gt, prediction, conf_thr):
    ious = np.arange(0.3, 0.81, 0.05)
    TP, FP, FN = (
        np.zeros(ious.shape[0], "int16"),
        np.zeros(ious.shape[0], "int16"),
        np.zeros(ious.shape[0], "int16"),
    )
    prediction = prediction[prediction[:, 4] > conf_thr]
    bboxes = prediction[:, :4].astype("int")
    bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] / 2
    bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] / 2
    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
    if bboxes.size != 0:
        if gt.size == 0:
            fp = bboxes.shape[0]
            FP = np.full(ious.shape[0], fp, "int16")
        else:
            iou_matrix = box_iou(torch.Tensor(gt), torch.Tensor(bboxes))
            for n, iou_thr in enumerate(ious):
                x = torch.where(iou_matrix >= iou_thr)
                tp = np.unique(x[0]).shape[0]
                fp = bboxes.shape[0] - tp
                fn = gt.shape[0] - tp
                TP[n] = tp
                FP[n] = fp
                FN[n] = fn
    else:
        if gt.size != 0:
            fn = gt.shape[0]
            FN = np.full(ious.shape[0], fn, "int16")
    return TP, FP, FN

In [6]:
with open("/app/f2_results.json", "r") as f:
    res_dict = json.load(f)

In [8]:
transform = A.Compose(
    [
        A.ShiftScaleRotate(
            shift_limit=0.2, scale_limit=[0.1, 0.2], rotate_limit=45, p=1.0,
        ),
        A.HueSaturationValue(
            hue_shift_limit=3, sat_shift_limit=3, val_shift_limit=5, p=1
        ),
        A.RandomBrightnessContrast(
            brightness_limit=0.01, contrast_limit=0.05, brightness_by_max=True, p=1
        ),
    ],
    bbox_params=A.BboxParams(
        format="yolo", min_visibility=0.5, label_fields=["class_labels"]
    ),
)

In [None]:
conf_thres = np.arange(0.1, 0.61, 0.01)
ious = np.arange(0.3, 0.81, 0.05)
res = np.zeros([conf_thres.shape[0], 3, ious.shape[0]])

path = f"/app/_data/yolov5_f2/runs/train/{NAME}/weights/best.pt"
IMG_SIZE = IMG_SIZE
model = torch.hub.load(
    "/app/_data/yolov5", "custom", path=path, source="local", force_reload=True
)
model.conf = 0.01


In [None]:
img_w = 1280
img_h = 720
for ix in tqdm(df_test.index):
    image_path = df_test.loc[ix, "img_path"]
    img_name = df_test.loc[ix, "image_id"]
    annotations = df_test.loc[ix, "annotations"]
    img = np.array(Image.open(image_path))
    bboxes = np.zeros([len(annotations), 5])
    if len(annotations):
        for i in range(len(annotations)):
            xmin = annotations[i]["x"] / img_w
            ymin = annotations[i]["y"] / img_h
            width = annotations[i]["width"] / img_w
            height = annotations[i]["height"] / img_h
            width = width if (width + xmin) <= 1 else (1 - xmin)
            height = height if (height + ymin) <= 1 else (1 - ymin)
            x_center = xmin + width / 2
            y_center = ymin + height / 2
            bboxes[i:, 0] = 0
            bboxes[i:, 1] = x_center
            bboxes[i:, 2] = y_center
            bboxes[i:, 3] = width
            bboxes[i:, 4] = height
        for n in range(5):
            transformed = transform(
                image=img,
                bboxes=bboxes[:, 1:],
                class_labels=bboxes[:, 0],
            )
            a_img, a_bbox = transformed["image"], transformed["bboxes"]
            prediction = model(a_img, size=IMG_SIZE, augment=True).xywh[0].cpu().numpy()
            prediction = prediction[prediction[:, 4] > 0.1]
            gt = np.array(a_bbox)
            if gt.size:
                gt[:, 0] *= 1280
                gt[:, 1] *= 720
                gt[:, 2] *= 1280
                gt[:, 3] *= 720
                gt[:, 0] = gt[:, 0] - gt[:, 2] / 2
                gt[:, 1] = gt[:, 1] - gt[:, 3] / 2
                gt[:, 2] = gt[:, 0] + gt[:, 2]
                gt[:, 3] = gt[:, 1] + gt[:, 3]

            for n, c_th in enumerate(conf_thres):
                TP, FP, FN = tp_fp_fn(gt, prediction, c_th)
                res[n, 0, :] += TP
                res[n, 1, :] += FP
                res[n, 2, :] += FN
            
            
            
            
            


In [None]:
conf_thres = np.arange(0.1, 0.61, 0.01)
ious = np.arange(0.3, 0.81, 0.05)
res = np.zeros([conf_thres.shape[0], 3, ious.shape[0]])

path = f"/app/_data/yolov5_f2/runs/train/{NAME}/weights/best.pt"
IMG_SIZE = IMG_SIZE
model = torch.hub.load(
    "/app/_data/yolov5", "custom", path=path, source="local", force_reload=True
)
model.conf = 0.01
# chose validation set
df_test = val.copy()
# computing f2 score
for ix in tqdm(df_test.index.tolist()):
    img = np.array(Image.open(df_test.loc[ix, "img_path"]))
    prediction = model(img, size=IMG_SIZE, augment=True).xywh[0].cpu().numpy()
    prediction = prediction[prediction[:, 4] > 0.1]
    gt = np.array([list(x.values()) for x in df_test.loc[ix, "annotations"]])
    if gt.size:
        gt[:, 2] = gt[:, 2] + gt[:, 0]
        gt[:, 3] = gt[:, 3] + gt[:, 1]
    for n, c_th in enumerate(conf_thres):
        TP, FP, FN = tp_fp_fn(gt, prediction, c_th)
        res[n, 0, :] += TP
        res[n, 1, :] += FP
        res[n, 2, :] += FN

YOLOv5 🚀 v6.0-193-gdb1f83b torch 1.9.1+cu111 CUDA:0 (NVIDIA GeForce RTX 3090, 24265MiB)

Fusing layers... 
Model Summary: 476 layers, 76118664 parameters, 0 gradients, 110.0 GFLOPs
Adding AutoShape... 
 37% 3207/8561 [10:20<17:17,  5.16it/s]

In [None]:
F2 = np.zeros(conf_thres.shape[0])
for c in range(conf_thres.shape[0]):
    TP = res[c, 0, :]
    FP = res[c, 1, :]
    FN = res[c, 2, :]
    recall = TP / (TP + FN)
    precission = TP / (TP + FP)
    f2 = 5 * precission * recall / (4 * precission + recall + 1e-16)
    F2[c] = np.mean(f2)
if path not in res_dict:
    res_dict[path] = {
        IMG_SIZE: {
            "best": [
                np.round(conf_thres[np.argmax(F2)], 2),
                np.round(np.max(F2), 4),
            ],
            "all": list(np.round(F2, 4)),
        }
    }
else:
    res_dict[path][IMG_SIZE] = {
        "best": [
            np.round(conf_thres[np.argmax(F2)], 2),
            np.round(np.max(F2), 4),
        ],
        "all": list(np.round(F2, 4)),
    }

In [None]:
res_dict[path][IMG_SIZE]