In [None]:
!git clone https://github.com/facebookresearch/detectron2.git
%cd detectron2
!python -m pip install -e ./

# Initialize Templates

In [None]:
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from PIL import ImageDraw
from PIL import Image
import pandas as pd
import numpy as np
import json
import copy
import os
import cv2
import ast

# functions
def get_templates():
    coco_json_template = {
        "info":{},
        "images":[],
        "licenses":[],
        "annotations":[],
        "categories":[]
    }
    image_template = {
        'license': 0, 
        'file_name': '',
        'coco_url': None,
        'height': None,
        'width': None,
        'date_captured': None,
        'flickr_url': None,
        'id': None
    }
    annotation_template = {
        'segmentation':[[]],
        'area':None,
        'iscrowd':0,
        'image_id':None,
        'bbox':[],
        'category_id':None,
        'id':None
    }
    category_template = {
        'supercategory': 'Coral_creatures', 
        'id': None, 
        'name': ''
    }
    return coco_json_template,image_template, annotation_template, category_template

In [None]:
def to_json(df, base_dir, draw=False):
    coco_json, image_template, annotation_template, category_template = get_templates()

    category_ = copy.deepcopy(category_template)
    category_["id"] = 1
    category_["name"] ="starfish"
    coco_json['categories'].append(category_)

    annot_id = 1
    image_id = 1
    with tqdm(total=len(df)) as pbar:
        for i, row in df.iterrows():
            # get instances of annotations
            image_prop = copy.deepcopy(image_template)

            # get image properties
            image_name = os.path.join(f"video_{row['video_id']}", f"{row['video_frame']}.jpg")

            img_ = Image.open(os.path.join(base_dir, image_name))
            img_width, img_height = img_.size

            image_prop['file_name'] = image_name
            image_prop['height'] = img_height
            image_prop['width'] = img_width
            image_prop['id'] = image_id
            image_id+=1

            # append the image
            coco_json['images'].append(image_prop)

            annotations = eval(row["annotations"])
            for annotation in annotations:
                image_annot = copy.deepcopy(annotation_template)
                bbox = [
                    annotation["x"],
                    annotation["y"],
                    annotation["width"],
                    annotation["height"]
                ]

                if draw:
                    draw_handle = ImageDraw.Draw(img_)
                    draw_handle.rectangle([(int(bbox[0]),int(bbox[1])),(int(bbox[2])+int(bbox[0]),int(bbox[3])+int(bbox[1]))],
                                         width = 5)
                    if not os.path.exists("output"):
                        os.mkdir("output")

                # populate the template
                image_annot['segmentation'] = []
                image_annot["area"] = bbox[2]*bbox[3]
                image_annot['image_id'] = image_id
                image_annot['bbox'] = bbox
                image_annot['category_id'] = 1
                image_annot['id'] = annot_id
                annot_id+=1

                # append the annotations
                coco_json['annotations'].append(image_annot)

            pbar.update(1)
            
    return coco_json

# Split the data and convert to json

In [None]:
from sklearn.model_selection import train_test_split
df = pd.read_csv("/kaggle/input/tensorflow-great-barrier-reef/train.csv")
train_df, test_df = train_test_split(df, test_size=0.2, random_state=43)

print(f"total images in test set :- {len(train_df)}")
print(f"total images in test set :- {len(test_df)}")

In [None]:
IMG_BASE_DIR = "/kaggle/input/tensorflow-great-barrier-reef/train_images"

train_json = to_json(train_df, IMG_BASE_DIR)
with open("train.json", "w") as file:
    json.dump(train_json, file)
    
test_json = to_json(test_df, IMG_BASE_DIR)
with open("test.json", "w") as file:
    json.dump(test_json, file)

# register datasets

In [None]:
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data import detection_utils as utils
from detectron2.utils.visualizer import Visualizer
import matplotlib.pyplot as plt
import random

try:
    register_coco_instances("Coral_starfish_train", {}, "train.json", IMG_BASE_DIR)
    register_coco_instances("Coral_starfish_test", {}, "test.json", IMG_BASE_DIR)
except AssertionError:
    print("dataset already created")

# visualize train Set

In [None]:
n = 5
damage_metadata = MetadataCatalog.get("Coral_starfish_train")
dataset_dicts = DatasetCatalog.get("Coral_starfish_train")
images_with_annot = [d for d in dataset_dicts if len(d["annotations"])!=0]
print(f"images with atleast one annotation in train Set :- {len(images_with_annot)}")
for d in random.sample(images_with_annot, n):
    print(d)
    # Draw ground Truths
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=damage_metadata, scale=1)
    vis = visualizer.draw_dataset_dict(d)
    gt_image = vis.get_image()

    plt.figure(figsize=(16,9))
    plt.imshow(gt_image)
    plt.show()

# visualize test Set

In [None]:
n = 5
damage_metadata = MetadataCatalog.get("Coral_starfish_test")
dataset_dicts = DatasetCatalog.get("Coral_starfish_test")
images_with_annot = [d for d in dataset_dicts if len(d["annotations"])!=0]
print(f"images with atleast one annotation in test Set :- {len(images_with_annot)}")
for d in random.sample(images_with_annot, n):
    print(d)
    # Draw ground Truths
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=damage_metadata, scale=1)
    vis = visualizer.draw_dataset_dict(d)
    gt_image = vis.get_image()

    plt.figure(figsize=(16,9))
    plt.imshow(gt_image)
    plt.show()