# YOLOXの準備

In [None]:
!git clone https://github.com/Megvii-BaseDetection/YOLOX -q

%cd YOLOX
!pip install -U pip && pip install -r requirements.txt
!pip install -v -e . 
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!pip install Cython

# データセットの準備

In [None]:
%cd /kaggle/working

from tqdm import tqdm
tqdm.pandas()
import warnings
warnings.filterwarnings("ignore")

import ast
import os
import json
import pandas as pd
import torch
import importlib
import cv2 

from shutil import copyfile
from tqdm.notebook import tqdm
tqdm.pandas()
from sklearn.model_selection import GroupKFold
from PIL import Image
from string import Template
from IPython.display import display

In [None]:
def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_path(row):
    row['image_path'] = f'{TRAIN_PATH}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

## train.csvの読み出し

In [None]:
import pandas as pd
df = pd.read_csv("/kaggle/input/tensorflow-great-barrier-reef/train.csv")
df.head()

## アノテーションがある部分のみ抽出

In [None]:
# Taken only annotated photos
df["num_bbox"] = df['annotations'].apply(lambda x: str.count(x, 'x'))
df_train = df[df["num_bbox"]>0]
df_train.head()


#Annotations 
df_train['annotations'] = df_train['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df_train['bboxes'] = df_train.annotations.progress_apply(get_bbox)

#Images resolution
df_train["width"] = 1280
df_train["height"] = 720

#Path of images
TRAIN_PATH = '/kaggle/input/tensorflow-great-barrier-reef'
df_train = df_train.progress_apply(get_path, axis=1)

## ヒトデのカット&ペースト

In [None]:
SELECTED_VIDEO = 2

In [None]:

!rm -r /kaggle/working/cut_paste/
!mkdir -p /kaggle/working/cut_paste/video_0/
!mkdir -p /kaggle/working/cut_paste/video_1/
!mkdir -p /kaggle/working/cut_paste/video_2/

import cv2
import random
import torchvision.ops.boxes as bops
import copy
df_train

PASTE_NUM = 20
bboxes_list = []
img_path = []
for i in tqdm(range(len(df_train))):
    if df_train.iloc[i]['video_id'] == SELECTED_VIDEO:
        bboxes_list.append(df_train.iloc[i]['bboxes'])
        img_path.append(df_train.iloc[i]['image_path'])
        continue
    bboxes = copy.deepcopy(df_train.iloc[i]['bboxes'])
    new_bbox_list = copy.deepcopy(bboxes)
    path = df_train.iloc[i]['image_path']
    video_id = df_train.iloc[i]['video_id']
    video_frame = df_train.iloc[i]['video_frame']
    img = cv2.imread(path)
    for i in range(PASTE_NUM):
        bbox_tensor = torch.tensor([[bbox[0] , bbox[1] , bbox[0]+bbox[2] , bbox[1]+bbox[3]] for bbox in new_bbox_list])
        bbox = random.choice(bboxes)
        cut_img = img[bbox[1]:bbox[1]+bbox[3] , bbox[0]:bbox[0]+bbox[2]]
        point = random.randint(0 , 1280 - bbox[2]) ,random.randint(0 , 720 - bbox[3] ) 
        iou = bops.box_iou(bbox_tensor , torch.tensor([[point[0]  , point[1] ,point[0] +  bbox[2] ,point[1]+  bbox[3]]]))
        if(torch.sum(iou) > 0):
            continue
        try:
            img[point[1]:point[1]+bbox[3]  , point[0]:point[0] + bbox[2]] = cut_img
            new_bbox_list.append([point[0], point[1] , bbox[2]  , bbox[3]])
        except:
            continue
    bboxes_list.append(new_bbox_list)

    cv2.imwrite(f'/kaggle/working/cut_paste/video_{video_id}/{video_frame}.jpg' , img) 
    img_path.append(f'/kaggle/working/cut_paste/video_{video_id}/{video_frame}.jpg')

In [None]:
df_train['bboxes'] = bboxes_list
df_train['image_path'] = img_path
df_train

## COCOフォーマットに変更

In [None]:
HOME_DIR = '/kaggle/working/' 
DATASET_PATH = f'kaggle_dataset/images'

!rm -r {HOME_DIR}{DATASET_PATH}

!mkdir {HOME_DIR}kaggle_dataset
!mkdir {HOME_DIR}{DATASET_PATH}
!mkdir {HOME_DIR}{DATASET_PATH}/train2017
!mkdir {HOME_DIR}{DATASET_PATH}/val2017
!mkdir {HOME_DIR}{DATASET_PATH}/annotations


for i in tqdm(range(len(df_train))):
    row = df_train.iloc[i]
    if row.video_id != SELECTED_VIDEO:
        copyfile(f'{row.image_path}', f'{HOME_DIR}{DATASET_PATH}/train2017/{row.image_id}.jpg')
    else:
        copyfile(f'{row.image_path}', f'{HOME_DIR}{DATASET_PATH}/val2017/{row.image_id}.jpg') 
print(f'FOLD {SELECTED_VIDEO} Number of training files: {len(os.listdir(f"{HOME_DIR}{DATASET_PATH}/train2017/"))}')
print(f'FOLD {SELECTED_VIDEO} Number of validation files: {len(os.listdir(f"{HOME_DIR}{DATASET_PATH}/val2017/"))}')

In [None]:
def save_annot_json(json_annotation, filename):
    with open(filename, 'w') as f:
        output_json = json.dumps(json_annotation)
        f.write(output_json)

annotion_id = 0

In [None]:
def dataset2coco(df, dest_path):
    
    global annotion_id
    
    annotations_json = {
        "info": [],
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": []
    }
    
    info = {
        "year": "2021",
        "version": "1",
        "description": "COTS dataset - COCO format",
        "contributor": "",
        "url": "https://kaggle.com",
        "date_created": "2021-11-30T15:01:26+00:00"
    }
    annotations_json["info"].append(info)
    
    lic = {
            "id": 1,
            "url": "",
            "name": "Unknown"
        }
    annotations_json["licenses"].append(lic)

    classes = {"id": 0, "name": "starfish", "supercategory": "none"}

    annotations_json["categories"].append(classes)

    
    for ann_row in df.itertuples():
            
        images = {
            "id": ann_row[0],
            "license": 1,
            "file_name": ann_row.image_id + '.jpg',
            "height": ann_row.height,
            "width": ann_row.width,
            "date_captured": "2021-11-30T15:01:26+00:00"
        }
        
        annotations_json["images"].append(images)
        
        bbox_list = ann_row.bboxes
        
        for bbox in bbox_list:
            b_width = bbox[2]
            b_height = bbox[3]
            
            # some boxes in COTS are outside the image height and width
            if (bbox[0] + bbox[2] > 1280):
                b_width = bbox[0] - 1280 
            if (bbox[1] + bbox[3] > 720):
                b_height = bbox[1] - 720 
                
            image_annotations = {
                "id": annotion_id,
                "image_id": ann_row[0],
                "category_id": 0,
                "bbox": [bbox[0], bbox[1], b_width, b_height],
                "area": bbox[2] * bbox[3],
                "segmentation": [],
                "iscrowd": 0
            }
            
            annotion_id += 1
            annotations_json["annotations"].append(image_annotations)
        
        
    print(f"Dataset COTS annotation to COCO json format completed! Files: {len(df)}")
    return annotations_json

In [None]:

# Convert COTS dataset to JSON COCO
train_annot_json = dataset2coco(df_train[df_train.video_id != SELECTED_VIDEO], f"{HOME_DIR}{DATASET_PATH}/train2017/")
val_annot_json = dataset2coco(df_train[df_train.video_id == SELECTED_VIDEO], f"{HOME_DIR}{DATASET_PATH}/val2017/")

# Save converted annotations
save_annot_json(train_annot_json, f"{HOME_DIR}{DATASET_PATH}/annotations/train.json")
save_annot_json(val_annot_json, f"{HOME_DIR}{DATASET_PATH}/annotations/valid.json")

# データの分布を確認(Valは重要)

In [None]:
import matplotlib.pyplot as plt
import random
import numpy as np
hist_list = []
for k in [SELECTED_VIDEO]:
    train_area_list = []
    val_area_list = []
    #print(f'fold {k}')
    for i in tqdm(range(len(df_train))):
        bboxes = df_train.iloc[i].bboxes
        for bbox in bboxes:
            area = np.sqrt(bbox[2]*bbox[3])
            if df_train.iloc[i].video_id == k:
                val_area_list.append(area)
            else:
                train_area_list.append(area)
    #sample_train = random.sample(train_area_list, len(val_area_list))
    #train = np.array(sample_train)
    #val = np.array(val_area_list)

    
    #plt.hist(sample_train,  range=(0, 5000),bins=100);
    #plt.hist(val_area_list ,  range=(0, 5000),bins=100);

    print(f'train: mean {np.mean(train_area_list)} std {np.std(train_area_list)}')
    print(f'val: mean {np.mean(val_area_list)} std {np.std(val_area_list)}')
    print(np.mean(train_area_list) - np.mean(val_area_list) ,  np.std(train_area_list) - np.std(val_area_list))
    # ヒストグラムを描画する
    plt.hist(train_area_list,  range=(0, 200),bins=100);
    plt.hist(val_area_list ,  range=(0, 200),bins=100);

# YOLOXの学習設定

In [None]:
def make_config(fold):
    config_file_template = f'''

#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.

import os

from yolox.exp import Exp as MyExp


class Exp(MyExp):
  def __init__(self):
    super(Exp, self).__init__()
    self.depth = 1
    self.width = 1
    self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
    self.output_dir = "/kaggle/working"

    # Define yourself dataset path
    self.data_dir = "/kaggle/working/kaggle_dataset/images"
    self.train_ann = "train.json"
    self.val_ann = "valid.json"
    self.basic_lr_per_img = 1e-2 / 64.0 
    self.num_classes = 1

    self.max_epoch = $max_epoch
    self.data_num_workers = 2
    self.eval_interval = 1

    self.mosaic_prob = 0.5
    self.mixup_prob = 0.5
    self.hsv_prob = 1.0
    self.flip_prob = 0.5
    self.no_aug_epochs = 2

    self.input_size = (1280, 1280)
    self.mosaic_scale = (1.0, 1.5)
    self.mixup_scale = (1.0 , 1.5)
    self.test_size = (1280, 1280)
'''
    return config_file_template

# 学習済みモデルのダウンロード

In [None]:
!wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.pth

# CV出力用のパッチコードを移植

In [None]:
%cd /kaggle/working/YOLOX
!cp ./tools/train.py ./
!cp  -r /kaggle/input/yolox-cots-cv-patch/yolox /kaggle/working/YOLOX/

# 学習

In [None]:
%cd /kaggle/working/YOLOX

#!rm -r /content/kaggle_dataset_{SELECTED_FOLD}/images/img_resized_cache_train2017.array
config_file_template = make_config(SELECTED_VIDEO)
PIPELINE_CONFIG_PATH=f'./cots_config_video_id_{SELECTED_VIDEO}_epoch_20_yolox_l_size_1280_data_mizumashi_0114.py'

pipeline = Template(config_file_template).substitute(max_epoch = 0)

with open(PIPELINE_CONFIG_PATH, 'w') as f:
    f.write(pipeline)
    # ./yolox/data/datasets/voc_cl asses.py

voc_cls = '''VOC_CLASSES = ("starfish",)'''
with open('/kaggle/working/YOLOX/yolox/data/datasets/voc_classes.py', 'w') as f:
    f.write(voc_cls)

# ./yolox/data/datasets/coco_classes.py

coco_cls = '''COCO_CLASSES = ("starfish",)'''

with open('/kaggle/working/YOLOX/yolox/data/datasets/coco_classes.py', 'w') as f:
    f.write(coco_cls)

# check if everything is ok    
!more /kaggle/working/YOLOX/yolox/data/datasets/coco_classes.py
!python train.py \
  -f {PIPELINE_CONFIG_PATH} \
  -d 1 \
  -b 4\
  --fp16 \
  --cache\
  -o \
  -c /kaggle/working/yolox_l.pth\
  