### CHECK PYTORCH VERSION

In [None]:
import torch
print("PyTorch version: ", torch.__version__)
print("GPU: ", torch.cuda.is_available())
print("Type: ", torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else "CPU")

### IMPORT LIBRARY

In [None]:
import ast
import glob
import os
import yaml

import numpy as np
import pandas as pd


from IPython.display import Image, display
from IPython.core.magic import register_line_cell_magic
from shutil import copyfile
from tqdm import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings("ignore")

In [None]:
HOME_DIR = '/kaggle/working'
DATASET_PATH = '/kaggle/input/tensorflow-great-barrier-reef'

### 1. PREPARE DATASET

In [None]:
# I just used spllited dataset by @julian3833 - Reef - A CV strategy: subsequences! 
# https://www.kaggle.com/julian3833/reef-a-cv-strategy-subsequences 

df = pd.read_csv("../input/reef-cv-strategy-subsequences-dataframes/train-validation-split/train-0.1.csv")
df.head(3)

In [None]:
def add_path(row):
    return f'{DATASET_PATH}/train_images/video_{row.video_id}/{row.video_frame}.jpg'

def num_boxes(annotations):
    annotations = ast.literal_eval(annotations)
    return len(annotations)

df['path'] = df.apply(lambda row: add_path(row), axis=1)
df['num_bbox'] = df['annotations'].apply(lambda x: num_boxes(x))
print("New path and annotations preprocessing completed")

df.head(3)

In [None]:
df = df[df.num_bbox > 0]

print(f'Dataset images with annotations: {len(df)}')
df.head(3)

In [None]:
def add_new_path(row):
    if row.is_train:
        return f"{HOME_DIR}/yolo_dataset/images/train/{row.image_id}.jpg"
    else:
        return f"{HOME_DIR}/yolo_dataset/images/valid/{row.image_id}.jpg"
    
df['new_path'] = df.apply(lambda row: add_new_path(row), axis=1)
print("New image path for train/valid created")
df.head(3)

In [None]:
print(df['path'][16])
print(df['new_path'][16])
print(df['image_path'][16])

### 2. CREATE DATASET FILE STRUCTURE

In [None]:
os.makedirs(f"{HOME_DIR}/yolo_dataset/images/train")
os.makedirs(f"{HOME_DIR}/yolo_dataset/images/valid")
os.makedirs(f"{HOME_DIR}/yolo_dataset/labels/train")
os.makedirs(f"{HOME_DIR}/yolo_dataset/labels/valid")
print(f"Directory structure yor Yolov5 created")

In [None]:
def copy_file(row):
    copyfile(row.path, row.new_path)
    
_ = df.progress_apply(lambda row: copy_file(row), axis=1)

print("Sucessfully copy file for train and valid")

### 3. CREATE YOLOv5 ANNOTATIONS

In [None]:
IMG_WIDTH, IMG_HEIGHT = 1280, 720

def get_yolo_format_bbox(bbox, img_w, img_h):
    w = bbox['width']
    h = bbox['height']
    
    if (bbox['x'] + bbox['width'] > img_w):
        w = img_w - bbox['x']
    if (bbox['y'] + bbox['height'] > img_h):
        h = img_h - bbox['y']
    
    xc = bbox['x'] + int(np.round(w/2))
    yc = bbox['y'] + int(np.round(h/2))
    
    # normalize
    return [xc/img_w, yc/img_h, w/img_w, h/img_h]

for index, row in tqdm(df.iterrows()):
    annotations = ast.literal_eval(row.annotations)
    bboxes = []
    for annot in annotations:
        bbox = get_yolo_format_bbox(annot, IMG_WIDTH, IMG_HEIGHT)
        bboxes.append(bbox)
        
    if row.is_train:
        file_name = f"{HOME_DIR}/yolo_dataset/labels/train/{row.image_id}.txt"
        os.makedirs(os.path.dirname(file_name), exist_ok=True)
    else:
        file_name = f"{HOME_DIR}/yolo_dataset/labels/valid/{row.image_id}.txt"
        os.makedirs(os.path.dirname(file_name), exist_ok=True)
        
    with open(file_name, 'w') as f:
        for i, bbox in enumerate(bboxes):
            label = 0
            bbox = [label] + bbox
            bbox = [str(i) for i in bbox]
            bbox = " ".join(bbox)
            f.write(bbox)
            f.write("\n")

print("Annotations in Yolov5 format for all images created.")

### 4. CREATE YOLOv5 DATASET CONFIGURATION FILE

In [None]:
train_data = os.listdir('/kaggle/working/yolo_dataset/labels/train')
num_train_file = len(train_data)
print("Number of txt file in train folder: ", num_train_file)

valid_data = os.listdir('/kaggle/working/yolo_dataset/labels/valid')
num_valid_file = len(valid_data)
print("Number of txt file in valid foler: ", num_valid_file)

In [None]:
%cat '/kaggle/working/yolo_dataset/labels/train/{train_data[10]}'

### 4. INSTALL YOLOv5

4A. CLONE YOLOv5 GIT REPOSITORY

In [None]:
# Download YOLOv5
# !git clone https://github.com/ultralytics/yolov5
# !cp -r ../input/yolov5 ./
!cp -r /kaggle/input/yolov5 /kaggle/working/
!ls

In [None]:
!pip install torchvision --upgrade -q
!pip install wandb --upgrade

In [None]:
%cd yolov5

# Install dependencies
!pip install -qr requirements.txt

4B. CREATE YOLOv5 DATASET CONFIGURATION FILE

In [None]:
data_yaml = dict(
    train = f"{HOME_DIR}/yolo_dataset/images/train",
    val = f"{HOME_DIR}/yolo_dataset/images/valid",
    nc = 1, # number of class
    names = ['cots'] # classes
)

with open(f'{HOME_DIR}/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=False)

print("Dataset configuration file for YOLOv5 is created")

%cat /kaggle/working/yolov5/data/data.yaml


In [None]:
!ls '/kaggle/working/yolov5/data'

In [None]:
# change directory
%cd ..

4C. Train YOLOv5 with W&B

In [None]:
# more about Secrets -> https://www.kaggle.com/product-feedback/114053
import wandb
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api") 
wandb.login(key=wandb_api)
wandb.login(anonymous='must')

In [None]:
%cd yolov5

In [None]:
BATCH_SIZE = 4
EPOCHS = 20
IMG_SIZE=1280
# Selected_Fold=4  #0..4

All training results are saved to runs/train/ with incrementing run directories, i.e. runs/train/exp2, runs/train/exp3 etc.

In [None]:
#best_weights = '/kaggle/input/nfl-weights/yolov5/kaggle-reef/exp/weights/best.pt' --weights {best_weights} \
!python train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights yolov5l6.pt \
                 --project kaggle-Reef \
                 --device 0 \
#                  --evolve