# Importy

In [None]:
import warnings
import ast
import pandas as pd
from tqdm.notebook import tqdm
from shutil import copyfile
from IPython.display import display
import numpy as np
import cv2
from PIL import Image
import os
import matplotlib.pyplot as plt
import random
%matplotlib inline
tqdm.pandas()

# Przygotowanie środowiska

In [None]:
DATASET_PATH = '/kaggle/working/dataset'
IMAGES_TRAIN = f"{DATASET_PATH}/images_train"
IMAGES_VAL = f"{DATASET_PATH}/images_val"

PROJECT   = 'great-barrier-reef'

FOLD = 1
MODEL     = 'yolov5s6'
OPTMIZER  = 'Adam'
IMGSIZE = 2000
BATCH     = 4
EPOCHS    = 18
NAME      = f'{MODEL}-imgSize{IMGSIZE}-epochs{EPOCHS}-fold{FOLD}'
colors = None

CONF = 0.3
IOU = 0.4

!mkdir {DATASET_PATH}
!mkdir {IMAGES_VAL}
!mkdir {IMAGES_TRAIN}

## Ładowanie wcześniej wytrenowanego modelu

In [None]:

!mkdir -p /root/.config/Ultralytics/ && cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/
MODEL =  f"/kaggle/input/starfishmodel/best"

# Funkcje pomocnicze

## Bbox-utils (awsaf49)

In [None]:
#https://github.com/awsaf49/bbox/blob/main/bbox/utils.py

def coco2yolo(bboxes, height=720, width=1280):
    bboxes[..., 0::2] /= width
    bboxes[..., 1::2] /= height
    bboxes[..., 0:2] += bboxes[..., 2:4]/2
    return bboxes

def voc2coco(bboxes, height=720, width=1280):
    bboxes[..., 2:4] -= bboxes[..., 0:2]
    return bboxes

def coco2voc(bboxes, height=720, width=1280):
    bboxes[..., 2:4] += bboxes[..., 0:2]
    return bboxes

def clip_bbox(bboxes_voc, height=720, width=1280):
    bboxes_voc[..., 0::2] = bboxes_voc[..., 0::2].clip(0, width)
    bboxes_voc[..., 1::2] = bboxes_voc[..., 1::2].clip(0, height)
    return bboxes_voc

def voc2yolo(bboxes, height=720, width=1280):
#     bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int  
    bboxes[..., 0::2] /= width
    bboxes[..., 1::2] /= height
    bboxes[..., 2] -= bboxes[..., 0]
    bboxes[..., 3] -= bboxes[..., 1]
    bboxes[..., 0] += bboxes[..., 2]/2
    bboxes[..., 1] += bboxes[..., 3]/2
    
    return bboxes

def str2annot(data):
    data  = data.replace('\n', ' ')
    data  = data.strip().split(' ')
    data  = np.array(data)
    annot = data.astype(float).reshape(-1, 5)
    return annot

def annot2str(data):
    data   = data.astype(str)
    string = '\n'.join([' '.join(annot) for annot in data])
    return string

def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    if bbox_format == 'yolo':
        for idx in range(len(bboxes)):     
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            if cls in show_classes:
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
    elif bbox_format == 'coco':
        for idx in range(len(bboxes)):    
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    elif bbox_format == 'voc':
        for idx in range(len(bboxes)):  
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')
    return image

## Funkcje z Great-Barrier-Reef: YOLOv5 (awsaf49)

In [None]:
def load_model(ckpt_path, conf=0.25, iou=0.50):
    model = torch.hub.load('/kaggle/input/yolov5-lib-ds',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    model.multi_label = False  # NMS multiple labels per box
    model.max_det = 1000  # maximum number of detections per image
    return model

def predict(model, img, size=768, augment=False):
    height, width = img.shape[:2]
    results = model(img, size=size, augment=augment)  # custom inference size
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]
    
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot

def show_img(img, bboxes, bbox_format='yolo'):
    global colors
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    colors = (0, 255 ,0) if colors is None else colors
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = bbox_format,
                           line_thickness = 2)
    return Image.fromarray(img).resize((800, 400))

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

## Własne

In [None]:
def get_path(row):
    return f'{dataset_root}train_images/video_{row.video_id}/{row.video_frame}.jpg'

def extractLables(df, dest_path):
    for i in range(len(df)):
        row = df.iloc[i]
        with open(f"{dest_path}/{row.image_id}.txt", "w") as f:
            if(row.annotations == "[]"):
                f.write('')
                continue
            bboxes_coco  = np.array(row.bboxes).astype(np.float32).copy()
            bboxes_voc  = coco2voc(bboxes_coco, row.height, row.width)
            bboxes_voc  = clip_bbox(bboxes_voc, row.height, row.width)
            bboxes_yolo = voc2yolo(bboxes_voc, row.height, row.width).astype(str)
            
            labels = np.array([0]*len(bboxes_coco))[..., None].astype(str)
            annots = np.concatenate([labels, bboxes_yolo], axis=1)
            string = annot2str(annots)
            f.write(string)
            
def fancy_rhea_augumentation(path):
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    i =  random.randint(0, 4)
    if(i == 0 or i == 4):
        #contrast
        rand = random.uniform(0.3, 1.2)
        alpha = rand
        img = cv2.convertScaleAbs(img, alpha=alpha)
    if (i == 1 or i == 4):
        #sharpness
        kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
        img = cv2.filter2D(img, -1, kernel)
    if (i == 2 or i == 4):
        #saturation
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        (h, s, v) = cv2.split(img_hsv)
        rand = random.choice([2,3,0.5])
        if(rand == 0.5):
            s = s * 2
        else:
            s = s // rand 
        s = np.clip(s,0,255)
        imghsv = cv2.merge([h,s,v])
        img = cv2.cvtColor(imghsv.astype("uint8"), cv2.COLOR_HSV2BGR)
    if (i == 3 or i == 4):
        #noise
        gauss = np.random.normal(0,1,img.size)
        gauss = gauss.reshape(img.shape[0],img.shape[1],img.shape[2]).astype('uint8')
        img = img * gauss + img
    return img

# Wczytanie i preprocesowanie danych

In [None]:
dataset_root = '/kaggle/input/tensorflow-great-barrier-reef/'

df = pd.read_csv(f'{dataset_root}/train.csv')

df["width"] = 1280
df["height"] = 720


df = df[df['annotations'] != "[]"]
df['bboxes'] = df['annotations'].progress_apply(eval)
df['bboxes'] = df["bboxes"].progress_apply(get_bbox)
df["img_path"] = df.progress_apply(get_path, axis=1)
df = df.reset_index(drop=True)
df.head(5)

## Fold

In [None]:
from sklearn.model_selection import GroupKFold

kf = GroupKFold(n_splits = 3)
df = df.reset_index(drop=True)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(kf.split(df, groups=df.video_id.tolist())):
    df.loc[val_idx, 'fold'] = fold
display(df.fold.value_counts())

In [None]:
train_df = df.query("fold!=@FOLD")
valid_df = df.query("fold==@FOLD")
len(train_df), len(valid_df)

## Przenoszenie zdjęć + augumentacja

In [None]:
%ls {IMAGES_VAL} | wc -l
%ls {IMAGES_TRAIN} | wc -l

In [None]:
for i in range(len(train_df)):
    row = train_df.iloc[i]
    copyfile(f'{row.img_path}', f'{IMAGES_TRAIN}/{row.image_id}.jpg')
#     img = fancy_rhea_augumentation(f'{IMAGES_TRAIN}/{row.image_id}.jpg')
#     cv2.imwrite(f'{IMAGES_TRAIN}/{row.image_id}_2.jpg', img)
    
for i in range(len(valid_df)):
    row = valid_df.iloc[i]
    copyfile(f'{row.img_path}', f'{IMAGES_VAL}/{row.image_id}.jpg')

In [None]:
%ls {IMAGES_VAL} | wc -l
%ls {IMAGES_TRAIN} | wc -l

## Zapisywanie labeli do osobnych plików

In [None]:
extractLables(train_df, IMAGES_TRAIN)
extractLables(valid_df, IMAGES_VAL)

# for file_name in os.listdir(IMAGES_TRAIN):
#     if (".txt" not in file_name or "_2" in file_name):
#         continue
#     source = IMAGES_TRAIN + "/" + file_name
#     copyfile(source, source.replace(".txt", "_2.txt"))

In [None]:
%ls {IMAGES_VAL} | wc -l
%ls {IMAGES_TRAIN} | wc -l

# YOLOv5

In [None]:
import torch

%cd /kaggle/working
!rm -r /kaggle/working/yolov5
# !git clone https://github.com/ultralytics/yolov5 # clone
!cp -r /kaggle/input/yolov5-lib-ds /kaggle/working/yolov5
%cd yolov5
%pip install -qr requirements.txt 

from yolov5 import utils
# display = utils.notebook_init()  # check

## Pliki YAML

In [None]:
%%writefile {DATASET_PATH}/yolo.yaml
path: /kaggle/working/dataset/
train: images_train
val: images_val

nc: 1
names: ['starfish']

In [None]:
%%writefile {DATASET_PATH}/hyp.yaml
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 4.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.10  # image translation (+/- fraction)
scale: 0.5  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.5  # image flip up-down (probability)
fliplr: 0.5  # image flip left-right (probability)
mosaic: 0.5  # image mosaic (probability)
mixup: 0.5 # image mixup (probability)
copy_paste: 0.0  # segment copy-paste (probability)

## Trenowanie

In [None]:
# from kaggle_secrets import UserSecretsClient
# import wandb
# user_secrets = UserSecretsClient()
# wandb.login(key=user_secrets.get_secret("wandb"))

In [None]:
# !python train.py --img {IMGSIZE}\
# --batch {BATCH}\
# --epochs {EPOCHS}\
# --optimizer {OPTMIZER}\
# --data {DATASET_PATH}/yolo.yaml\
# --hyp {DATASET_PATH}/hyp.yaml\
# --weights {MODEL}.pt\
# --project {PROJECT}\
# --name {NAME}\
# --exist-ok

In [None]:
# OUTPUT_DIR = '{}/{}'.format(PROJECT, NAME)
# MODEL = f"/kaggle/working/yolov5/{OUTPUT_DIR}/weights/best"

# Predykcja

In [None]:
model = load_model(MODEL + ".pt", conf=CONF, iou=IOU)
image_paths = train_df[train_df['annotations'] != "[]"].sample(100).img_path.tolist()
for idx, path in enumerate(image_paths):
    img = cv2.imread(path)[...,::-1]
    bboxes, confis = predict(model, img, size=IMGSIZE, augment=False)
    display(show_img(img, bboxes, bbox_format='coco'))
    if idx>5:
        break

In [None]:
model = load_model(MODEL + ".pt", conf=CONF, iou=IOU)
image_paths = train_df[train_df['annotations'] != "[]"].sample(100).img_path.tolist()
for idx, path in enumerate(image_paths):
    img = cv2.imread(path)[...,::-1]
    bboxes, confis = predict(model, img, size=IMGSIZE, augment=False)
    display(show_img(img, bboxes, bbox_format='coco'))
    if idx>5:
        break

In [None]:
model = load_model(MODEL + ".pt", conf=CONF, iou=IOU)
image_paths = valid_df[valid_df['annotations'] != "[]"].sample(100).img_path.tolist()
for idx, path in enumerate(image_paths):
    img = cv2.imread(path)[...,::-1]
    bboxes, confis = predict(model, img, size=IMGSIZE, augment=False)
    display(show_img(img, bboxes, bbox_format='coco'))
    if idx>5:
        break

## Predykcja do konkursu

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

In [None]:
model = load_model(MODEL + ".pt", conf=CONF, iou=IOU)
for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
    bboxes, confs  = predict(model, img, size=IMGSIZE, augment=False)
    annot          = format_prediction(bboxes, confs)
    pred_df['annotations'] = annot
    env.predict(pred_df)
    if idx<3:
        display(show_img(img, bboxes, bbox_format='coco'))

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()