*** IMPORTANT *** 
This code was written for training purposes. The code is shared only for knowledge sharing purposes. The code should not be used in any production environments.

## Create a coco dataset from a csv file with x,y coordinates of the segment polygon.


You can run this notebook to generate the coco json format for a dataset in that has `x.y` coordinates for the mask. 

Then you can run the viwer.ipynb notebook to visualize the coco annotations. 

This code is based on the git repo https://github.com/Tony607/voc2coco

In [None]:
import cv2
from matplotlib import pyplot as plt
import pandas as pd
import os
import json
import glob

START_BOUNDING_BOX_ID = 1
PRE_DEFINE_CATEGORIES = {"cavity":0}

In [None]:
def get_df(fpath):
    annotation_csv = pd.read_csv(f'{fpath}', header=None)
#     annotation_csv.head(5)
    return annotation_csv
# annotation_csv.to_numpy().flatten().tolist()

In [None]:
# This method gets the segmentation for polyons. It should not be used to return RLE values because the assumption is
# the annotated data does not contain crowds instead contain individual elements.

def get_segmentation(df):
    segmentations = []
#     segs = list(map(int, annotation_csv.to_numpy().flatten().tolist())) 
    segs = df.to_numpy().flatten().tolist()
    segmentations.append(segs)
#     print(segmentations)
    return segmentations

In [None]:
def get_image(csv_file):
#     print(csv_file)
    name = csv_file.split('/')[-1].split('.')[0].split('frame')
    folder_path = name[0].rstrip('_')
    filename =  'frame'+name[1]+'.jpg'
    img_path = '../../opencv-tutorial/data/split_data/'+folder_path+'/'+filename
    image = cv2.imread(f'{img_path}')
#     cv2.imwrite(f'./test-data/csv/{folder_path}_{filename}', image)
    return filename, folder_path, image
    

In [None]:
def convert(csv_files, json_output):
    
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    
    image_id_counter = 0
    
    if PRE_DEFINE_CATEGORIES is not None:
        categories = PRE_DEFINE_CATEGORIES
    else:
        print("You must define the category")
        return None
    
    bnd_id = START_BOUNDING_BOX_ID
    
    for indx, csv_file in enumerate(csv_files):
        category_id = PRE_DEFINE_CATEGORIES["cavity"] 
        image_id = image_id_counter
        
        
        filename, folder_path, img = get_image(csv_file)
        annotation_csv = pd.read_csv(f'{csv_file}', header=None)

        
        height = img.shape[0]
        width = img.shape[1]
        channels = img.shape[2]
        image = {
            "file_name": folder_path+'_'+filename,
            "height": height,
            "width": width,
            "id": image_id,  # TODO This id is not universal.
        }
        json_dict["images"].append(image)
        
        xmin = int(annotation_csv.min(0)[0]) - 1
        ymin = int(annotation_csv.min(0)[1]) - 1
        xmax = int(annotation_csv.max(0)[0]) + 1
        ymax = int(annotation_csv.max(0)[1]) + 1
        assert xmax > xmin
        assert ymax > ymin
        o_width = abs(xmax - xmin)
        o_height = abs(ymax - ymin)
        o_segmentation = get_segmentation(annotation_csv)

        if len(o_segmentation) > 0:
            ann = {
                "area": o_width * o_height,
                "iscrowd": 0,
                "image_id": image_id,
                "bbox": [xmin, ymin, o_width, o_height],
                "category_id": category_id,
                "id": bnd_id,
                "ignore": 0,
                "segmentation": o_segmentation,
            }
            json_dict["annotations"].append(ann)
#             bnd_id = bnd_id + 1
        image_id_counter = image_id_counter + 1
            
    for cate, cid in categories.items():
        cat = {"supercategory": "none", "id": cid, "name": cate}
        json_dict["categories"].append(cat)

    
    os.makedirs(os.path.dirname(json_file), exist_ok=True)
    json_fp = open(json_file, "w")
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()

    
    

In [None]:
csv_files = glob.glob(os.path.join('./test-data/csv', "*.csv"))
json_file = "./test-data/csv/output.json"

convert(csv_files, json_file)
