## 1. Imports

In [None]:
import os
import cv2
import ast
import yaml
import torch
import shutil
import random
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from sklearn.model_selection import GroupKFold

from tqdm.notebook import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../input/yolov5ds')
sys.path.append('../input/hyperparameters-for-yolov5')
sys.path.append('../input/tensorflow-great-barrier-reef')

import utils
tqdm.pandas()

%pip install -q wandb
%pip install wandb --upgrade
import wandb
wandb.login(key="f04c0b8d3b383666c2518b204435adcb3f9532e9")

## 2. Configuration

In [None]:
def random_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        
random_seed(1702)

In [None]:
FOLD = 2
IMAGE_WIDTH = 1280
IMAGE_HEIGHT = 720
REMOVE_NOBBOX = True 
NUM_BACKGROUND_IMG = 983 # ~20% 

CWD = '/kaggle/working/'
IMAGE_DIR = '/kaggle/images' 
LABEL_DIR = '/kaggle/labels' 
ROOT_DIR = '/kaggle/input/tensorflow-great-barrier-reef/'

In [None]:
!mkdir -p {IMAGE_DIR}
!mkdir -p {LABEL_DIR}

## 3. Helpers

I don't write any code in this section.

In [None]:
def coco2yolo(image_width, image_height, bboxes):
    bboxes = bboxes.copy().astype(float) 
    
    # normalize
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]] / image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]] / image_height
    
    # gets xmid and ymid 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]] / 2
    
    return bboxes

def get_bbox(annots):
    # converts from dictionary to list 
    # formart after converting: [x, y, width, height]
    
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

## 4. Dataset

In [None]:
def get_path(row):
    row['old_image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    row['image_path'] = f'{IMAGE_DIR}/video_{row.video_id}_{row.video_frame}.jpg'
    row['label_path'] = f'{LABEL_DIR}/video_{row.video_id}_{row.video_frame}.txt'
    
    return row

df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df = df.progress_apply(get_path, axis=1)
df['annotations'] = df['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
df['bboxes'] = df.annotations.progress_apply(get_bbox)

df.head(2)

In [None]:
# gets background images
df_empty_bbox = df[df["num_bbox"] == 0]
df_empty_bbox = df_empty_bbox.sample(frac=1).reset_index(drop=True).iloc[:NUM_BACKGROUND_IMG,]

# gets images with objects
df = df[df["num_bbox"] > 0]

# concats background images and image with objects
df = pd.concat([df, df_empty_bbox], ignore_index=True)

In [None]:
# copy from the original path to kaggle/working 
# because the models requires folder that can be written data on

def make_copy(path):
    data = path.split('/')
    filename = data[-1]
    video_id = data[-2]
    new_path = os.path.join(IMAGE_DIR, f'{video_id}_{filename}')
    shutil.copy(path, new_path)
    return

# using Parrallel for faster copying 
image_paths = df.old_image_path.tolist()
_ = Parallel(n_jobs=-1, backend='threading')(delayed(make_copy)(path) for path in tqdm(image_paths))

In [None]:
train_df = df[df.video_id != FOLD]
valid_df = df[df.video_id == FOLD]

train_df.shape[0], valid_df.shape[0]

## 5. Files required for YOLOv5

More details about the requirements can be found [here](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data).

In [None]:
# dataset.yaml for YOLOv5

with open(os.path.join(CWD, 'train.txt'), 'w') as f:
    for path in train_df.image_path.tolist():
        f.write(path + '\n')
            
with open(os.path.join(CWD, 'val.txt'), 'w') as f:
    for path in valid_df.image_path.tolist():
        f.write(path + '\n')

data = dict(
    path  = CWD,
    train =  os.path.join(CWD, 'train.txt'),
    val   =  os.path.join(CWD, 'val.txt' ),
    nc    = 1,
    names = ['cots'],
    )

with open(os.path.join(CWD, 'starfish.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(os.path.join(CWD, 'starfish.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
# labels for YOLOv5

for row_idx in tqdm(range(df.shape[0])):
    row = df.iloc[row_idx]
    bboxes_coco = np.array(row.bboxes).astype(np.float32).copy()
    num_bbox = row.num_bbox
    names = ['cots'] * num_bbox
    labels = [0] * num_bbox

    with open(row.label_path, 'w') as f:
        if num_bbox < 1:
            annot = ''
            f.write(annot)
            continue
            
        bboxes_yolo  = coco2yolo(IMAGE_WIDTH, IMAGE_HEIGHT, bboxes_coco)
        bboxes_yolo  = np.clip(bboxes_yolo, 0, 1)
        
        for bbox_idx in range(len(bboxes_yolo)):
            label = [str(labels[bbox_idx])]
            bboxes = list(bboxes_yolo[bbox_idx].astype(str))
            new_line = (['\n'] if num_bbox != (bbox_idx + 1) else [''])
            
            annot =  label + bboxes + new_line
            annot = ' '.join(annot)
            annot = annot.strip(' ')
            
            f.write(annot)

## 6. Training

In [None]:
%cd /kaggle/working
!rm -r /kaggle/working/yolov5
!cp -r ../input/yolov5ds /kaggle/working/yolov5
%cd yolov5

!python train.py --img 1280\
--batch 10\
--epochs 20\
--data /kaggle/working/starfish.yaml\
--weights yolov5m.pt --workers 0\
--adam\
--save-period 1

In [None]:
def voc2yolo(bboxes, image_height=720, image_width=1280):
    bboxes = bboxes.copy().astype(float) 
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2coco(bboxes, image_height=720, image_width=1280):
    bboxes = bboxes.copy().astype(float) 
    
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]] * image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]] * image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]] / 2
    
    return bboxes

def voc2coco(bboxes, image_height=720, image_width=1280):
    bboxes  = voc2yolo(bboxes, image_height, image_width)
    bboxes  = yolo2coco(bboxes, image_height, image_width)
    
    return bboxes

def predict(model, img, size=768, augment=False):
    height, width = img.shape[:2]
    results = model(img, size=size, augment=augment)  
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    
    if len(bboxes):
        bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]
    
def format_prediction(bboxes, confs):
    annot = ''
    
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
            
        annot = annot.strip(' ')
        
    return annot

In [None]:
def calc_iou(bboxes1, bboxes2, bbox_mode='xywh'):
    assert len(bboxes1.shape) == 2 and bboxes1.shape[1] == 4
    assert len(bboxes2.shape) == 2 and bboxes2.shape[1] == 4
    
    bboxes1 = bboxes1.copy()
    bboxes2 = bboxes2.copy()
    
    if bbox_mode == 'xywh':
        bboxes1[:, 2:] += bboxes1[:, :2]
        bboxes2[:, 2:] += bboxes2[:, :2]

    x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1)
    x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1)
    xA = np.maximum(x11, np.transpose(x21))
    yA = np.maximum(y11, np.transpose(y21))
    xB = np.minimum(x12, np.transpose(x22))
    yB = np.minimum(y12, np.transpose(y22))
    interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0)
    boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
    boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
    iou = interArea / (boxAArea + np.transpose(boxBArea) - interArea)
    return iou

def f_beta(tp, fp, fn, beta=2):
    return (1+beta**2)*tp / ((1+beta**2)*tp + beta**2*fn+fp)

def imagewise_f2_score_at_iou_th(gt_bboxes, pred_bboxes, iou_th, verbose=False):
    gt_bboxes = gt_bboxes.copy()
    pred_bboxes = pred_bboxes.copy()

    tp = 0
    fp = 0
    for k, pred_bbox in enumerate(pred_bboxes): # fixed in ver.7
        ious = calc_iou(gt_bboxes, pred_bbox[None, 1:])
        max_iou = ious.max()
        if max_iou > iou_th:
            tp += 1
            gt_bboxes = np.delete(gt_bboxes, ious.argmax(), axis=0)
        else:
            fp += 1
        if len(gt_bboxes) == 0:
            fp += len(pred_bboxes) - (k + 1) # fix in ver.7
            break

    fn = len(gt_bboxes)
    score = f_beta(tp, fp, fn, beta=2)
    
    return score



def imagewise_f2_score(gt_bboxes, pred_bboxes, verbose=False):
    """
    gt_bboxes: (N, 4) np.array in xywh format
    pred_bboxes: (N, 5) np.array in conf+xywh format
    """
    # v2: add corner case hundling.
    if len(gt_bboxes) == 0 and len(pred_bboxes) == 0:
        return 1.0
    elif len(gt_bboxes) == 0 or len(pred_bboxes) == 0:
        return 0.0
    
    pred_bboxes = pred_bboxes[pred_bboxes[:,0].argsort()[::-1]] # sort by conf
    
    scores = []
    for iou_th in np.arange(0.3, 0.85, 0.05):
        scores.append(imagewise_f2_score_at_iou_th(gt_bboxes, pred_bboxes, iou_th, verbose))
    return np.mean(scores)

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
IOU = 0.35
CONF = 0.15
IMG_SIZE = 1280
AUGMENT = False

CKPT_PATH = '../input/starfish/epoch19.pt'
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'

In [None]:
def load_model(ckpt_path, conf=0.25, iou=0.50):
    model = torch.hub.load('/kaggle/input/yolov5-lib-ds',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  
    model.conf = conf  
    model.iou  = iou  
    model.classes = None 
    model.multi_label = False  
    model.max_det = 1000 
    
    return model

model = load_model(CKPT_PATH, conf=CONF, iou=IOU)
gt_bboxes_list = []
pred_bboxes_list = []

for idx in range(valid_df.shape[0]):
    row = valid_df.iloc[idx]
    
    bboxes, confs = predict(model, row["old_image_path"], size=IMG_SIZE, augment=AUGMENT)
    annot = format_prediction(bboxes, confs)
    pred_df['annotations'] = annot
    
    gt_bboxes_list.append(np.asarray(bbox, dtype="float64"))
    pred_bboxes_list.append(np.asarray(pred_bboxes.split(" "), dtype="float64"))
    