**GitHub:[IceVision Framework](https://github.com/airctic/icevision)**

**Documentation: [IceVision Docs](https://airctic.com/dev/)**

# **Installation**

In [None]:


import torch
cuda_version_major = int(torch.version.cuda.split('.')[0])

!git clone --depth=1 https://github.com/airctic/icevision.git
%cd icevision
!pip install -e .[all,dev]
!pre-commit install

!pip install mmcv-full=="1.3.17" -f https://download.openmmlab.com/mmcv/dist/11/1.9.1/index.html --upgrade
!pip install mmdet


In [None]:
pwd

In [None]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

# **Imports**

In [None]:
from icevision.all import *

import pandas as pd
import os
import ast
import numpy as np

import greatbarrierreef

# **Prepare Training dataset**

In [None]:
INPUT_DIR = '/kaggle/input/tensorflow-great-barrier-reef/'
INPUT_DIR_IMG = '/kaggle/input/tensorflow-great-barrier-reef/train_images/'
sys.path.append(INPUT_DIR)
sys.path.append(INPUT_DIR_IMG)

In [None]:
TRAINING_RATIO = 0.8

data_df = pd.read_csv(os.path.join(INPUT_DIR, 'train.csv'))

# Split the dataset so that no sequence is leaked from the training dataset into the validation dataset.
split_index = int(TRAINING_RATIO * len(data_df))
while data_df.iloc[split_index - 1].sequence == data_df.iloc[split_index].sequence:
    split_index += 1

# Shuffle both the training and validation datasets.
train_data_df = data_df.iloc[:split_index].sample(frac=1).reset_index(drop=True)
val_data_df = data_df.iloc[split_index:].sample(frac=1).reset_index(drop=True)

train_positive_count = len(train_data_df[train_data_df.annotations != '[]'])
val_positive_count = len(val_data_df[val_data_df.annotations != '[]'])

print('Training ratio (all samples):', 
      float(len(train_data_df)) / (len(train_data_df) + len(val_data_df)))
print('Training ratio (positive samples):', 
      float(train_positive_count) / (train_positive_count + val_positive_count))

In [None]:
# Take only the positive images for training and validation
train_data_df = train_data_df[train_data_df.annotations != '[]'].reset_index()
print('Number of positive images used for training:', len(train_data_df))
val_data_df = val_data_df[val_data_df.annotations != '[]'].reset_index()
print('Number of positive images used for validation:', len(val_data_df))

train_data_df["annotations"] = train_data_df["annotations"].map(lambda x : ast.literal_eval(x))

train_data_df["filepath"] = train_data_df.apply(lambda x : f"video_{x.video_id}/{x.video_frame}.jpg", axis=1)

train_data_df.head(3)

In [None]:
train_data_df.head(3)

In [None]:
HEIGHT, WIDTH = 720, 1280
presize = 512
size = 384

df = train_data_df
df = df.explode("annotations")

df["width"] = [WIDTH]*len(df)
df["height"] = [HEIGHT]*len(df)
df["label"] = ["starfish"]*len(df)

df["xmin"] = df.apply(lambda x : x.annotations["x"], axis=1)
df["ymin"] = df.apply(lambda x : x.annotations["y"], axis=1)
df["xmax"] = df.apply(lambda x : x.annotations["x"]+x.annotations["width"], axis=1)
df["ymax"] = df.apply(lambda x : x.annotations["y"]+x.annotations["height"], axis=1)

df.loc[df["xmax"] > 1280, "xmax"] = 1280
df.loc[df["ymax"] > 720, "ymax"] = 720

df = df.drop(["video_id","sequence","video_frame","sequence_frame",
              "image_id","annotations"], axis=1)

df = df.reset_index(drop=True)
df.head(3)

In [None]:
df.head(10)

# **Data Parser**

### Creating [custom parser](https://airctic.com/dev/custom_parser/) for the COTS dataset

In [None]:
#Class template for creating custom parser in Icevision
template_record = ObjectDetectionRecord()
Parser.generate_template(template_record)

In [None]:
class COTSParser(Parser):
    def __init__(self, template_record, data_dir, df):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = df
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
            
    def __len__(self) -> int:
        return len(self.df)
    
    def record_id(self, o) -> Hashable:
        return o.filepath
    
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(os.path.join(self.data_dir,o.filepath))
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)

        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])

# **Data Loader**

In [None]:
parser = COTSParser(template_record, INPUT_DIR_IMG, df)

train_records, valid_records = parser.parse()
print(parser.class_map)

In [None]:
#Augmentations using albumentation
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=size, presize=presize), tfms.A.Normalize()])

valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(size), tfms.A.Normalize()])

train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

samples = [train_ds[0] for _ in range(5)]
show_samples(samples, ncols=3)

# **Defining Model**

In [None]:
PATH = "../input/yolo-x-training-using-icevision/COTS_yolo-sx2.pth"
fine_tune = True #set false if running this for the first time

model_type = models.mmdet.yolox #selecting the model
backbone = model_type.backbones.yolox_tiny_8x8(pretrained=True) #selecting backbone for the model
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map)) #instantiate the model

if fine_tune:
    state_dict = torch.load(PATH)
    model.load_state_dict(state_dict)

In [None]:
train_dl = model_type.train_dl(train_ds, batch_size=1, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=1, shuffle=False)

In [None]:
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

# **Model Training**

In [None]:
#finding the best prabable learning rate
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
learn.lr_find() 

In [None]:
#begin training
learn.fine_tune(5, 6.309573450380412e-07, freeze_epochs=1)

In [None]:
infer_dl = model_type.infer_dl([valid_ds[0],valid_ds[4],valid_ds[7],valid_ds[9]], batch_size=1, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)
show_preds(preds=preds)

In [None]:
torch.save(model.state_dict(), "COTS_yolo-sx2.pth")