In [None]:
!pip install -qU wandb
!pip install -qU bbox-utility # check https://github.com/awsaf49/bbox

In [None]:
import numpy as np
import pandas as pd
from os import listdir
from tqdm.notebook import tqdm

from bbox.utils import coco2yolo, annot2str

In [None]:
import wandb

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("WANDB")
wandb.login(key=api_key)
anonymous = None

In [None]:
images_directory = '/kaggle/working/images'
labels_directory = '/kaggle/working/labels'

In [None]:
!rm -r '/kaggle/working/images'
!rm -r '/kaggle/working/labels'

In [None]:
!mkdir -p '/kaggle/working/images'
!mkdir -p '/kaggle/working/labels'

In [None]:
# load data with paths to images and labels
df = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')

df['old_image_path'] = '/kaggle/input/tensorflow-great-barrier-reef/train_images/video_' \
                            + df.video_id.astype(str) + '/' \
                            + df.video_frame.astype(str)+'.jpg'

df['image_path']  = images_directory + '/' + df.image_id + '.jpg'
df['label_path']  = labels_directory + '/' + df.image_id + '.txt'


# annotations to yolo-format bboxes
df['bboxes'] = df['annotations'].apply(lambda annotations: \
                                            coco2yolo(
                                                 np.array([list(annot.values()) for annot in eval(annotations)], dtype=float)
                                             ))
df.drop(columns=['annotations'], inplace=True)

# find non-empty images
df['num_bbox'] = df['bboxes'].apply(lambda x: len(x))
df = df.query("num_bbox>0")


print('df.shape:', df.shape)
df.head(2)

In [None]:
# select only 200 images

#df = df.sample(n=200, ignore_index=True, random_state=42)

In [None]:
# copy images to new folder

import shutil
from joblib import Parallel, delayed

def make_copy(row):
    shutil.copyfile(row.old_image_path, row.image_path)
    return

image_paths = df.old_image_path.tolist()
_ = Parallel(n_jobs=-1, backend='threading')(delayed(make_copy)(row) for _, row in tqdm(df.iterrows(), total=len(df)))

print(len(listdir(images_directory)), ' elements in images directory')

In [None]:
# create labels files

count = 0
all_bboxes = []
bboxes_info = []

for row_idx in tqdm(range(df.shape[0])):
    row = df.iloc[row_idx]
    bboxes_yolo  = row.bboxes 
    num_bbox     = row.num_bbox
    names        = ['cots']*num_bbox
    labels       = np.array([0]*num_bbox)[..., None].astype(str)
    
    with open(row.label_path, 'w') as f:
        if num_bbox<1:
            f.write('')
            count+=1
            continue
        
        all_bboxes.extend(bboxes_yolo.astype(float))
        bboxes_info.extend([[row.image_id, row.video_id, row.sequence]]*len(bboxes_yolo))
        annots = np.concatenate([labels, bboxes_yolo], axis=1)
        string = annot2str(annots)
        
        f.write(string)        
        
print('Missing:',count)
print(len(listdir(labels_directory)), ' elements in labels directory')

In [None]:
# Group fold by video_id

from sklearn.model_selection import GroupKFold

kf = GroupKFold(n_splits = 3)
df = df.reset_index(drop=True)

try:
    df.drop(['fold'], inplace=True, axis = 1)
except:
    pass

df['fold'] = -1

for fold, (train_idx, val_idx) in enumerate(kf.split(df, groups=df.video_id.tolist())):
    df.loc[val_idx, 'fold'] = fold

df.fold.value_counts()

In [None]:
# define train and validation datasets
FOLD = 1
train_files = []
val_files   = []

train_df = df.query("fold!=@FOLD")
valid_df = df.query("fold==@FOLD")
train_files += list(train_df.image_path.unique())
val_files += list(valid_df.image_path.unique())

print('train_files length:', len(train_files)) 
print('val_files length:', len(val_files))

In [None]:
import yaml

cwd = '/kaggle/working/'

with open('/kaggle/working/train.txt', 'w') as f:
    for path in train_df.image_path.tolist():
        f.write(path+'\n')
            
with open('/kaggle/working/val.txt', 'w') as f:
    for path in valid_df.image_path.tolist():
        f.write(path+'\n')

data = dict(
    path  = '/kaggle/working',
    train =  '/kaggle/working/train.txt',
    val   =  '/kaggle/working/val.txt',
    nc    = 1,
    names = ['cots'],
    )

with open('/kaggle/working/gbr.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open('/kaggle/working/gbr.yaml', 'r')
print('\nyaml:')
print(f.read())

In [None]:
%%writefile /kaggle/working/hyp.yaml
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.10  # image translation (+/- fraction)
scale: 0.5  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.5  # image flip up-down (probability)
fliplr: 0.5  # image flip left-right (probability)
mosaic: 0.5  # image mosaic (probability)
mixup: 0.5 # image mixup (probability)
copy_paste: 0.0  # segment copy-paste (probability)

In [None]:
# get yolov5
%cd /kaggle/working
!rm -r /kaggle/working/yolov5
!git clone https://github.com/ultralytics/yolov5
    
#!cp -r /kaggle/input/yolov5-lib-ds /kaggle/working/yolov5
%cd yolov5
%pip install -qr requirements.txt

from yolov5 import utils
display = utils.notebook_init()

In [None]:
DIM       = 3000 
MODEL     = 'yolov5n'
BATCH     = 32
EPOCHS    = 10
OPTMIZER  = 'Adam'

PROJECT   = 'great-barrier-reef-public' # w&b in yolov5
NAME      = f'{MODEL}-dim{DIM}-fold{FOLD}' # w&b for yolov5

#REMOVE_NOBBOX = True # remove images with no bbox
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
IMAGE_DIR = '/kaggle/images' # directory to save images
LABEL_DIR = '/kaggle/labels' # directory to save labels

In [None]:
!python train.py --img {DIM}\
--batch {BATCH}\
--epochs {EPOCHS}\
--optimizer {OPTMIZER}\
--data /kaggle/working/gbr.yaml\
--hyp /kaggle/working/hyp.yaml\
--weights {MODEL}.pt\
--project {PROJECT} --name {NAME}\
--exist-ok