# Conversion from VGG to COCO Annotations

This Python script converts annotations from the VGG Image Annotator format to the Common Objects in Context (COCO) format. The conversion is primarily used for object detection tasks and is helpful when transitioning between different annotation formats.

### Dependencies:
- `skimage`: The scikit-image library for image processing.
- `math`: Standard Python math library.
- `itertools.chain`: Used for flattening lists.
- `numpy`: The NumPy library for numerical operations.
- `json`: JSON library for handling JSON data.
- `os`: The operating system module for file path operations.
erations.

In [None]:
import skimage 
import math
from itertools import chain
import numpy as np
import json
import os

def vgg_to_coco(dataset_dir, vgg_path, outfile, class_keyword):
    """
    Convert annotations from VGG Image Annotator format to COCO format.

    Parameters:
    - dataset_dir (str): Directory path containing the dataset images.
    - vgg_path (str): Path to the VGG annotation file in JSON format.
    - outfile (str): Path to the output COCO annotation file in JSON format.
    - class_keyword (str): Keyword to identify the class information in the annotations.

    Returns:
    None

    Example:
    >>> vgg_to_coco("path/to/dataset", "path/to/vgg_annotation.json", "path/to/output_coco.json", "class_keyword")

    Output:
    - Generates a COCO-format JSON file containing images, categories, and annotations.
    - If outfile is not provided, the default output file is named by replacing the ".json" extension with "_coco.json".

    Note:
    - Ensure that the required libraries are installed before running the script.
    - This conversion is designed for VGG annotations and may need adjustments for other annotation formats.
    """
    with open(vgg_path) as f:
        vgg = json.load(f)

    images_ids_dict = {}
    images_info = []
    for i,v in enumerate(vgg.values()):

        images_ids_dict[v["filename"]] = i
        image_path = os.path.join(dataset_dir, v['filename'])
        image = skimage.io.imread(image_path)
        height, width = image.shape[:2]  
        images_info.append({"file_name": v["filename"], "id": i, "width": width, "height": height})

    classes = {class_keyword} | {r["region_attributes"][class_keyword] for v in vgg.values() for r in v["regions"]
                             if class_keyword in r["region_attributes"]}
    category_ids_dict = {c: i for i, c in enumerate(classes, 1)}
    categories = [{"supercategory": class_keyword, "id": v, "name": k} for k, v in category_ids_dict.items()]
    annotations = []
    suffix_zeros = math.ceil(math.log10(len(vgg)))
    for i, v in enumerate(vgg.values()):
        #print(vgg.values())
        for j, r in enumerate(v["regions"]):
            print(r)
            if class_keyword in r["region_attributes"]:
                x, y = r["shape_attributes"]["all_points_x"], r["shape_attributes"]["all_points_y"]
                annotations.append({
                    "segmentation": [list(chain.from_iterable(zip(x, y)))],
                    "area": PolyArea(x, y),
                    "bbox": [min(x), min(y), max(x)-min(x), max(y)-min(y)],
                    "image_id": images_ids_dict[v["filename"]],
                    "category_id": category_ids_dict[r["region_attributes"][class_keyword]],
                    #"id": int(f'{i:0>{suffix_zeros}}{j:0>{suffix_zeros}}'),
                    "iscrowd": 0
                    })

    coco = {
        "images": images_info,
        "categories": categories,
        "annotations": annotations
        }
    if outfile is None:
        outfile = vgg_path.replace(".json", "_coco.json")
    with open(outfile, "w") as f:
        json.dump(coco, f)

In [None]:
vgg_path = '.../dataset/images/via_region_data_train.json'
dataset_dir = '.../dataset/images/train/'

vgg_to_coco(dataset_dir, vgg_path, None,"hail")

In [None]:
import json
#Load example file to adjust with new polygon data
with open('...images/via_region_data_train.json') as f:
    data = json.load(f)
    
#variables to update in json file
for file_id, filename in enumerate(all_train_files):
    if file_id > 0:
        data[str(file_id + 1)] = data['1']
        
    filename
all_points_x = []
all_points_y = []