# Download dataset from kaggle api
kaggle  
https://www.kaggle.com/c/tensorflow-great-barrier-reef/data  

kaggle API usage  
https://colab.research.google.com/github/corrieann/kaggle/blob/master/kaggle_api_in_colab.ipynb

In [None]:
!pip install kaggle
!pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c tensorflow-great-barrier-reef

In [None]:
!unzip -q /content/tensorflow-great-barrier-reef.zip 

In [5]:
# create hierarchy
import os

for f1 in 'train', 'val', 'test':
  for f2 in 'images', 'labels':
    os.makedirs(f'/content/Datasets/{f1}/{f2}')

In [None]:
import pandas as pd

df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_train

In [None]:
def annotation2YoloFormat(_annotation, width=1280, height=720):
  out = ''
  if isinstance(_annotation,str):
    a = _annotation
    while True:
      ix = a.find("'x': ")
      iy = a.find("'y': ")
      iw = a.find("'width': ")
      ih = a.find("'height': ")

      if ix < 0 or iy < 0 or iw < 0 or ih < 0:
        break

      x = float(a[ix + len("'x': "):a.find(", 'y")])/width
      y = float(a[iy + len("'y': "):a.find(", 'w")])/height
      w = float(a[iw + len("'width': "):a.find(", 'h")])/width
      h = float(a[ih + len("'height': "):a.find('}')])/height


      out += '0 %.6f %.6f %.6f %.6f\n'%(x+w/2, y+h/2, w, h)
      a = a[a.find('}')+1:]

  return out

In [8]:
# change image name to image id
# split images by train:val:test = .64:.16:.2
import shutil
import os
import random

%cd train_images

TEST_DIRECTORY = "../Datasets/test"
TRAIN_DIRECTORY = "../Datasets/train"
VAL_DIRECTORY = "../Datasets/val"

for dir in os.listdir():
  # print(dir)
  for image in os.listdir(dir):
    random_key = random.random()
    if random_key < 0.64:
      random_folder = TRAIN_DIRECTORY
    elif random_key < 0.8:
      random_folder = VAL_DIRECTORY
    else:
      random_folder = TEST_DIRECTORY
    shutil.move(f'{dir}/{image}', f"../Datasets/{random_folder}/images/{dir[-1]}-{image}")

    # might add label as well while in iteration
    # as YOLO format [{xywh},{},...]
    f = open(f"../Datasets/{random_folder}/labels/{dir[-1]}-{image[:-4]}.txt", 'w')
    f.write(annotation2YoloFormat(df_train.loc[df_train['image_id']==f'{dir[-1]}-{image[:-4]}', 'annotations'].iloc[0]))
    f.close()

%cd ..
!rm -rf train_images/

/content/train_images
/content


In [None]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

import torch
from yolov5 import utils
display = utils.notebook_init()  # checks

In [None]:
# create yaml file and save to /data (your at /content/yolov5)

In [None]:
# Train YOLOv5s on COCO128 for 3 epochs
# --img : image size (size,size)
# --batch : batch size
# --epochs : epochs
# --data : .yaml file name
# --weights : pretrained weights ('' --cfg yolov5s.yaml if from scratch)
!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt

In [None]:
# visualize