# 1. Installing YOLOX Libraries

Credit over to Remek Kinas for the following code: 

Code: https://www.kaggle.com/remekkinas/yolox-inference-on-kaggle-for-cots-lb-0-507?scriptVersionId=81625924

In [None]:
import pandas as pd
import numpy as np
import os
import ast
from sklearn.model_selection import GroupKFold
from string import Template
import json
import torch
from shutil import copyfile
import greatbarrierreef
import importlib
import random
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display

In [None]:
!git clone https://github.com/Megvii-BaseDetection/YOLOX -q

%cd YOLOX
!pip install -U pip && pip install -r requirements.txt
!pip install -v -e . 

In [None]:
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
N_SPLITS = 5
FOLD = random.randint(0, N_SPLITS - 1)
print(FOLD)

# 2. Splitting Data Into Training/Validation

**EDIT Version 12**: Because we will be cutting out any entries that do not contain any starfish, we will use GroupKFolds to create our folds. 

In [None]:
train = pd.read_csv("/kaggle/input/tensorflow-great-barrier-reef/train.csv")
train.head()

In [None]:
train['annotations'] = train['annotations'].apply(lambda x: ast.literal_eval(x))
train['box'] = train['annotations'].apply(lambda x: [list(y.values()) for y in x])
train['image_path'] = "video_" + train['video_id'].astype(str) + "/" + train['video_frame'].astype(str) + ".jpg"
train['fold'] = -1
train = train[train['annotations'].str.len() > 0].reset_index(drop = True)

In [None]:
kf = GroupKFold(n_splits=N_SPLITS)
for fold, (train_idx, val_idx) in enumerate(kf.split(train, y = train.video_id.tolist(), groups = train.sequence)):
    train.loc[val_idx, 'fold'] = fold
train['fold'] = train['fold'].astype(int)
train.head()

In [None]:
!mkdir /kaggle/working/dataset
!mkdir /kaggle/working/dataset/images
!mkdir /kaggle/working/dataset/images/train2017
!mkdir /kaggle/working/dataset/images/val2017
!mkdir /kaggle/working/dataset/images/annotations

In [None]:
for i in range(len(train)):
    row = train.loc[i]
    if(row.fold != FOLD):
        copyfile(f'/kaggle/input/tensorflow-great-barrier-reef/train_images/{row.image_path}', f'/kaggle/working/dataset/images/train2017/{row.image_id}.jpg')
    else:
        copyfile(f'/kaggle/input/tensorflow-great-barrier-reef/train_images/{row.image_path}', f'/kaggle/working/dataset/images/val2017/{row.image_id}.jpg')

# 3. Convert Dataset to YOLOX Format

There are two kinds of evaluators that can be used: the COCO evaluator and the VOC evaluator, both requiring specific formats. Only the COCO evaluator will be made for the time being. 

In [None]:
#COCO:
def datasetToCOCO(dataset):
    annotation = 0
    json = {
        "info": [],
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": []
    }
    info = {
        "year": "2021",
        "version": "1",
        "description": "COTS dataset - COCO format",
        "contributor": "",
        "url": "https://kaggle.com",
        "date_created": "2021-11-30T15:01:26+00:00"
    }
    json['info'].append(info)
    licenses = {
        "id": 1,
        "url": "",
        "name": "Unknown"
    }
    json['licenses'].append(licenses)
    categories = {
        "id": 0, 
        "name": "starfish", 
        "supercategory": "none"
    }
    json["categories"].append(categories)
    for row in dataset.itertuples():
        images = {
            "id": row[0],
            "license": 1,
            "file_name": row.image_id + '.jpg',
            "height": 720,
            "width": 1280,
            "date_captured": "2021-11-30T15:01:26+00:00"
        }
        json['images'].append(images)
        boxes = row.box
        for box in boxes:
            width = box[2]
            height = box[3]
            if (box[0] + box[2] > 1280):
                width = 1280 - box[0] 
            if (box[1] + box[3] > 720):
                height = 720 - box[1] 
            annotations = {
                "id": annotation,
                "image_id": row[0],
                "category_id": 0,
                "bbox": [box[0], box[1], width, height],
                "area": box[2] * box[3],
                "segmentation": [],
                "iscrowd": 0
            }
            annotation += 1
            json['annotations'].append(annotations)
    return json

In [None]:
train_coco = datasetToCOCO(train[train['fold'] != FOLD])
with open(f"/kaggle/working/dataset/images/annotations/train.json", 'w') as f:
    output_json = json.dumps(train_coco)
    f.write(output_json)
val_coco = datasetToCOCO(train[train['fold'] == FOLD])
with open(f"/kaggle/working/dataset/images/annotations/val.json", 'w') as f:
    output_json = json.dumps(val_coco)
    f.write(output_json)

# 4. Apply Evaluator

There are multiple kinds of YOLOX models, but for the time being, we will just focus on YOLOX-s.

In [None]:
template = '''
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.

import os

from yolox.exp import Exp as MyExp


class Exp(MyExp):
    def __init__(self):
        super(Exp, self).__init__()
        self.num_classes = 1
        self.depth = 0.33
        self.width = 0.50
        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
        self.data_dir = "/kaggle/working/dataset/images/"
        self.train_ann = 'train.json'
        self.val_ann = 'val.json'
        self.max_epoch = $max_epoch
        #self.eval_interval = 1
        #self.data_num_workers = 2
        #self.input_size = (960, 960)
        #self.test_size = (960, 960)
        #self.no_aug_epochs = 2
        #self.mosaic_scale = (0.5, 1.5)
        #self.random_size = (10, 20)
'''

In [None]:
pipeline = Template(template).substitute(max_epoch = 20)
with open('cots_config.py', 'w') as f:
    f.write(pipeline)

In [None]:
#VERSION 14: Added voc_cls to see if any differences arise.
voc_cls = '''
VOC_CLASSES = (
  "starfish",
)
'''
with open('/kaggle/working/YOLOX/yolox/data/datasets/voc_classes.py', 'w') as f:
    f.write(voc_cls)

coco_cls = '''
COCO_CLASSES = (
  "starfish",
)
'''
with open('/kaggle/working/YOLOX/yolox/data/datasets/coco_classes.py', 'w') as f:
    f.write(coco_cls)

# 5. Weights

And now to import the weights file for YOLOX-s.

In [None]:
sh = 'wget https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth'
MODEL = 'yolox_s.pth'
with open('script.sh', 'w') as file:
    file.write(sh)
!bash script.sh

# 6. Training

In [None]:
!cp /kaggle/working/YOLOX/tools/train.py ./

In [None]:
!python train.py -f cots_config.py -d 1 -b 32 --fp16 -o -c {MODEL}

# 7. Evaluation

The following was taken from Remek Kina's excellent training pipeline notebook: https://www.kaggle.com/remekkinas/yolox-training-pipeline-cots-dataset-lb-0-507

In [None]:
from yolox.utils import postprocess
from yolox.data.data_augment import ValTransform

In [None]:
current_exp = importlib.import_module('cots_config')
exp = current_exp.Exp()
test_size = (640, 640)
num_classes = 1
confthre = 0.01
nmsthre = 0.65
model = exp.get_model()
model.cuda()
model.eval()
ckpt = torch.load("/kaggle/working/YOLOX/YOLOX_outputs/cots_config/best_ckpt.pth", map_location="cpu")
model.load_state_dict(ckpt["model"])

In [None]:
%cd /kaggle/working/
def yolox_inference(img, model, test_size): 
    bboxes = []
    bbclasses = []
    scores = []
    
    preproc = ValTransform(legacy = False)

    tensor_img, _ = preproc(img, None, test_size)
    tensor_img = torch.from_numpy(tensor_img).unsqueeze(0)
    tensor_img = tensor_img.float()
    tensor_img = tensor_img.cuda()

    with torch.no_grad():
        outputs = model(tensor_img)
        outputs = postprocess(
                    outputs, num_classes, confthre,
                    nmsthre, class_agnostic=True
                )

    if outputs[0] is None:
        return [], [], []
    
    outputs = outputs[0].cpu()
    bboxes = outputs[:, 0:4]

    bboxes /= min(test_size[0] / img.shape[0], test_size[1] / img.shape[1])
    bbclasses = outputs[:, 6]
    scores = outputs[:, 4] * outputs[:, 5]
    
    return bboxes, bbclasses, scores

In [None]:
env = greatbarrierreef.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test set and sample submission

In [None]:
submission_dict = {
    'id': [],
    'prediction_string': [],
}

for (image_np, sample_prediction_df) in iter_test:
 
    bboxes, bbclasses, scores = yolox_inference(image_np, model, test_size)
    
    predictions = []
    for i in range(len(bboxes)):
        box = bboxes[i]
        cls_id = int(bbclasses[i])
        score = scores[i]
        if score < confthre:
            continue
        x_min = int(box[0])
        y_min = int(box[1])
        x_max = int(box[2])
        y_max = int(box[3])
        
        bbox_width = x_max - x_min
        bbox_height = y_max - y_min
        
        predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
    
    prediction_str = ' '.join(predictions)
    sample_prediction_df['annotations'] = prediction_str
    env.predict(sample_prediction_df)

    print('Prediction:', prediction_str)

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()