In [1]:
# import fiftyone as fo
import fiftyone.zoo as foz

# Download and load the validation split of COCO-2017
dataset_dir = "./coco_dataset"
dataset = foz.load_zoo_dataset("coco-2017", split="validation", dataset_dir=dataset_dir)

Downloading split 'validation' to './coco_dataset/validation' if necessary
Found annotations at 'coco_dataset/raw/instances_val2017.json'
Images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [2]:
import json
from typing import Dict, List
import numpy as np

from typing_extensions import Literal, TypedDict
from kiliautoml.utils.type import CategoryT

# ## DETECTRON FORMAT

class ImageCoco(TypedDict):
    id: int
    license: int
    file_name: str
    height: int
    width: int
    date_captured: None


class CategoryCoco(TypedDict):
    id: int
    name: str
    supercategory: str


class AnnotationsCoco(TypedDict):
    id: int
    image_id: int  # -> external_id : the last part of the url
    category_id: int
    bbox: List[int]
    segmentation: List[List[float]]  # [[x, y, x, y, x ...]]
    area: int
    iscrowd: int


class CocoFormat(TypedDict):
    info: Dict  # type: ignore
    licenses: List[Dict]  # type: ignore
    categories: List[CategoryCoco]
    images: List[ImageCoco]
    annotations: List[AnnotationsCoco]


# ## KILI Polygon Semantic Format

class NormalizedVertice(TypedDict):
    x: float
    y: float


class NormalizedVertices(TypedDict):
    normalizedVertices: List[NormalizedVertice]


class SemanticAnnotation(TypedDict):
    boundingPoly: List[NormalizedVertices]  # len(self.boundingPoly) == 1
    mid: str
    type: Literal["semantic"]
    categories: List[CategoryT]


class SemanticJob(TypedDict):
    annotations: List[SemanticAnnotation]


job_name = "SEMANTIC_JOB"


def convert_coco_to_kili(coco_format: CocoFormat) -> Dict[str, SemanticJob]:
    """
    Coco format:
    <dataset_dir>/
        data/
            <filename0>.<ext>
            <filename1>.<ext>
            ...
        labels.json

    We convert the json to kili format.
    """
    mapping_external_id_to_semanticjob: Dict[str, SemanticJob] = {}

    print("Nb categories", len(coco_format["categories"]))
    print("Nb annotations", len(coco_format["annotations"]))
    print("Nb images", len(coco_format["images"]))

    for coco_annotation in coco_format["annotations"]:
        # Extract Coco info
        category_names = [
            cat["name"] for cat in coco_format["categories"] if cat["id"] == coco_annotation["category_id"]
        ]
        assert len(category_names) == 1
        category_name = category_names[0]

        image_names = [
            image
            for image in coco_format["images"]
            if image["id"] == coco_annotation["image_id"]
        ]
        assert len(image_names) == 1
        external_id = image_names[0]["file_name"]
        height, width = image_names[0]["height"], image_names[0]["width"]

        # convert to Kili
        # Each connected component becones a new object in Kili format
        connected_components  : List[SemanticAnnotation]= []
        for single_connected_component in coco_annotation["segmentation"]:
            tab_xy = single_connected_component  # We take only the first connected component
            if type(tab_xy) != list:
                # print(single_connected_component)
                continue
            tab_x = list(np.array(tab_xy[::2]) / width )
            tab_y = list(np.array(tab_xy[1::2]) / height )

            normalizedVertices: NormalizedVertices = {
                "normalizedVertices": [NormalizedVertice(x=x, y=y) for x, y in zip(tab_x, tab_y)]
            }
            boundingPoly = [normalizedVertices]
            categories = [CategoryT(name=category_name, confidence=100)]

            annotation_kili = SemanticAnnotation(
                boundingPoly=boundingPoly,
                mid=None,# type:ignore  # Created on the fly
                type="semantic",
                categories=categories,
            )
            connected_components.append(annotation_kili)
        if external_id not in mapping_external_id_to_semanticjob:
            mapping_external_id_to_semanticjob[external_id] = SemanticJob(annotations=connected_components)
        else:
            previous_annotatations = mapping_external_id_to_semanticjob[external_id]["annotations"]
            mapping_external_id_to_semanticjob[external_id] = SemanticJob(annotations=previous_annotatations + connected_components)

    return mapping_external_id_to_semanticjob


def convert_coco_to_kili_json_interface(coco_format: CocoFormat):
    """
    Coco format:
    <dataset_dir>/
        data/
            <filename0>.<ext>
            <filename1>.<ext>
            ...
        labels.json

    We convert the json to kili format.
    """
    coco_categories = coco_format["categories"]

    import random

    number_of_colors = len(coco_categories)

    colors = [
        "#" + "".join([random.choice("0123456789ABCDEF") for __ in range(6)])
        for _ in range(number_of_colors)
    ]

    def camelCase(st):
        output = "".join(x for x in st.title() if x.isalnum())
        return output[0].lower() + output[1:]

    categories = {
        camelCase(cat["name"]): {
            "children": [],
            "name": cat["name"],
            "color": color,
            "id": cat["id"],
        }
        for cat, color in zip(coco_categories, colors)
    }

    json_interface = {
        "jobs": {
            job_name: {
                "content": {"categories": categories, "input": "radio"},
                "instruction": "Categories",
                "isChild": False,
                "tools": ["semantic"],
                "mlTask": "OBJECT_DETECTION",
                "models": {"interactive-segmentation": {"job": job_name + "_MARKER"}},
                "isVisible": True,
                "required": 1,
                "isNew": False,
            },
            job_name + "_MARKER": {
                "content": {"categories": categories, "input": "radio"},
                "instruction": "Categories",
                "isChild": False,
                "tools": ["marker"],
                "mlTask": "OBJECT_DETECTION",
                "isModel": True,
                "isVisible": False,
                "required": 0,
                "isNew": False,
            },
        }
    }

    return json_interface


In [6]:
from kili.client import Kili
import json

with open(dataset_dir + "/raw/instances_val2017.json", "r") as f:
    coco_format = json.load(f)


json_interface = convert_coco_to_kili_json_interface(coco_format=coco_format)

LIMIT = 50

assets = [
    {
        "externalId": asset["file_name"],
        "content": dataset_dir + "/validation/data/" + asset["file_name"],
        "metadata": {},
    }
    for asset in coco_format["images"][:LIMIT]
]

kili = Kili()

# Create project
project = kili.create_project(
    input_type="IMAGE",
    json_interface=json_interface,
    title="Coco to Kili",
    description="",
    project_type=None,
)
project_id = project["id"] # type:ignore

# Add assets
external_id_array = [a.get("externalId") for a in assets]
content_array = [a.get("content") for a in assets]
json_metadata_array = [a.get("metadata") for a in assets]
kili.append_many_to_dataset(
    project_id=project_id,
    content_array=content_array,# type:ignore
    external_id_array=external_id_array,# type:ignore
    json_metadata_array=json_metadata_array,# type:ignore
)

50it [09:24, 11.30s/it]


{'id': 'cl4wt3xp01vud0lwdb9qs2d2i'}

In [7]:
from tqdm import tqdm


mapping_external_id_to_semanticjob = convert_coco_to_kili(coco_format=coco_format)
asset_ids = kili.assets(project_id=project_id, fields=["id", "externalId"], first=1000)

for i, asset_id in tqdm(enumerate(asset_ids), total=len(asset_ids)):
    external_id = asset_id["externalId"]

    if external_id in mapping_external_id_to_semanticjob:
        semantic_job = mapping_external_id_to_semanticjob[external_id]

        # print(f"Nb annotation on image {external_id}", len(semantic_job["annotations"]))
        kili.append_to_labels(
            label_asset_id=asset_id["id"],
            json_response={"SEMANTIC_JOB": SemanticJob(annotations=semantic_job["annotations"])},
        )
    else:
        print("Warning", external_id)


Nb categories 80
Nb annotations 36781
Nb images 5000


100%|██████████| 1000/1000 [00:02<00:00, 409.59it/s]
27it [00:03, 10.21it/s]



31it [00:04, 10.36it/s]



162it [00:21, 10.06it/s]



280it [00:38,  9.49it/s]



405it [00:57,  5.79it/s]



438it [01:03,  7.73it/s]



759it [01:58,  8.24it/s]



1000it [02:42,  6.16it/s]


In [5]:
KILI_URL="https://cloud.kili-technology.com/"
print(f"{KILI_URL}label/projects/{project_id}/menu/queue?currentPage=1&pageSize=20")

https://cloud.kili-technology.com/label/projects/cl4wt2o501yvu0mtg23og3vqp/menu/queue?currentPage=1&pageSize=20


In [9]:
project_id, job_name

('cl4wt3xp01vud0lwdb9qs2d2i', 'SEMANTIC_JOB')