In [None]:
# import fiftyone as fo
import fiftyone.zoo as foz

# Download and load the validation split of COCO-2017
dataset = foz.load_zoo_dataset("coco-2017", split="validation")

In [11]:
import json
from typing import Dict, List
import numpy as np

from typing_extensions import Literal, TypedDict
from kiliautoml.utils.type import CategoryT

# ## DETECTRON FORMAT

class ImageCoco(TypedDict):
    id: int
    license: int
    file_name: str
    height: int
    width: int
    date_captured: None


class CategoryCoco(TypedDict):
    id: int
    name: str
    supercategory: str


class AnnotationsCoco(TypedDict):
    id: int
    image_id: int  # -> external_id : the last part of the url
    category_id: int
    bbox: List[int]
    segmentation: List[List[float]]  # [[x, y, x, y, x ...]]
    area: int
    iscrowd: int


class CocoFormat(TypedDict):
    info: Dict  # type: ignore
    licenses: List[Dict]  # type: ignore
    categories: List[CategoryCoco]
    images: List[ImageCoco]
    annotations: List[AnnotationsCoco]


# ## KILI Polygon Semantic Format

class NormalizedVertice(TypedDict):
    x: float
    y: float


class NormalizedVertices(TypedDict):
    normalizedVertices: List[NormalizedVertice]


class SemanticAnnotation(TypedDict):
    boundingPoly: List[NormalizedVertices]  # len(self.boundingPoly) == 1
    mid: str
    type: Literal["semantic"]
    categories: List[CategoryT]


class SemanticJob(TypedDict):
    annotations: List[SemanticAnnotation]


job_name = "SEMANTIC_JOB"


def convert_coco_to_kili(coco_format: CocoFormat) -> Dict[str, SemanticJob]:
    """
    Coco format:
    <dataset_dir>/
        data/
            <filename0>.<ext>
            <filename1>.<ext>
            ...
        labels.json

    We convert the json to kili format.
    """
    mapping_external_id_to_semanticjob: Dict[str, SemanticJob] = {}

    print("Nb categories", len(coco_format["categories"]))
    print("Nb annotations", len(coco_format["annotations"]))
    print("Nb images", len(coco_format["images"]))

    for coco_annotation in coco_format["annotations"]:
        # Extract Coco info
        category_names = [
            cat["name"] for cat in coco_format["categories"] if cat["id"] == coco_annotation["category_id"]
        ]
        assert len(category_names) == 1
        category_name = category_names[0]

        image_names = [
            image
            for image in coco_format["images"]
            if image["id"] == coco_annotation["image_id"]
        ]
        assert len(image_names) == 1
        external_id = image_names[0]["file_name"]
        height, width = image_names[0]["height"], image_names[0]["width"]

        # convert to Kili
        # Each connected component becones a new object in Kili format
        connected_components  : List[SemanticAnnotation]= []
        for single_connected_component in coco_annotation["segmentation"]:
            tab_xy = single_connected_component  # We take only the first connected component
            if type(tab_xy) != list:
                print(single_connected_component)
                continue
            tab_x = list(np.array(tab_xy[::2]) / width )
            tab_y = list(np.array(tab_xy[1::2]) / height )

            normalizedVertices: NormalizedVertices = {
                "normalizedVertices": [NormalizedVertice(x=x, y=y) for x, y in zip(tab_x, tab_y)]
            }
            boundingPoly = [normalizedVertices]
            categories = [CategoryT(name=category_name, confidence=100)]

            annotation_kili = SemanticAnnotation(
                boundingPoly=boundingPoly,
                mid=None,# type:ignore  # Created on the fly
                type="semantic",
                categories=categories,
            )
            connected_components.append(annotation_kili)
        if external_id not in mapping_external_id_to_semanticjob:
            mapping_external_id_to_semanticjob[external_id] = SemanticJob(annotations=connected_components)
        else:
            previous_annotatations = mapping_external_id_to_semanticjob[external_id]["annotations"]
            mapping_external_id_to_semanticjob[external_id] = SemanticJob(annotations=previous_annotatations + connected_components)

    return mapping_external_id_to_semanticjob


def convert_coco_to_kili_json_interface(coco_format: CocoFormat):
    """
    Coco format:
    <dataset_dir>/
        data/
            <filename0>.<ext>
            <filename1>.<ext>
            ...
        labels.json

    We convert the json to kili format.
    """
    coco_categories = coco_format["categories"]

    import random

    number_of_colors = len(coco_categories)

    colors = [
        "#" + "".join([random.choice("0123456789ABCDEF") for __ in range(6)])
        for _ in range(number_of_colors)
    ]

    def camelCase(st):
        output = "".join(x for x in st.title() if x.isalnum())
        return output[0].lower() + output[1:]

    categories = {
        camelCase(cat["name"]): {
            "children": [],
            "name": cat["name"],
            "color": color,
            "id": cat["id"],
        }
        for cat, color in zip(coco_categories, colors)
    }

    json_interface = {
        "jobs": {
            job_name: {
                "content": {"categories": categories, "input": "radio"},
                "instruction": "Categories",
                "isChild": False,
                "tools": ["semantic"],
                "mlTask": "OBJECT_DETECTION",
                "models": {"interactive-segmentation": {"job": job_name + "_MARKER"}},
                "isVisible": True,
                "required": 1,
                "isNew": False,
            },
            job_name + "_MARKER": {
                "content": {"categories": categories, "input": "radio"},
                "instruction": "Categories",
                "isChild": False,
                "tools": ["marker"],
                "mlTask": "OBJECT_DETECTION",
                "isModel": True,
                "isVisible": False,
                "required": 0,
                "isNew": False,
            },
        }
    }

    return json_interface


In [12]:
from kili.client import Kili
import json

with open("/Users/raph/fiftyone/coco-2017/raw/instances_val2017.json", "r") as f:
    coco_format = json.load(f)


json_interface = convert_coco_to_kili_json_interface(coco_format=coco_format)

assets = [
    {
        "externalId": asset["file_name"],
        "content": "/Users/raph/fiftyone/coco-2017/validation/data/" + asset["file_name"],
        "metadata": {},
    }
    for asset in coco_format["images"][:50]
]

kili = Kili()

# Create project
project = kili.create_project(
    input_type="IMAGE",
    json_interface=json_interface,
    title="Coco to Kili",
    description="",
    project_type=None,
)
project_id = project["id"] # type:ignore

# Add assets
external_id_array = [a.get("externalId") for a in assets]
content_array = [a.get("content") for a in assets]
json_metadata_array = [a.get("metadata") for a in assets]
kili.append_many_to_dataset(
    project_id=project_id,
    content_array=content_array,# type:ignore
    external_id_array=external_id_array,# type:ignore
    json_metadata_array=json_metadata_array,# type:ignore
)

1it [00:08,  8.23s/it]


['000000397133.jpg',
 '000000037777.jpg',
 '000000252219.jpg',
 '000000087038.jpg',
 '000000174482.jpg',
 '000000403385.jpg',
 '000000006818.jpg',
 '000000480985.jpg',
 '000000458054.jpg',
 '000000331352.jpg',
 '000000296649.jpg',
 '000000386912.jpg',
 '000000502136.jpg',
 '000000491497.jpg',
 '000000184791.jpg',
 '000000348881.jpg',
 '000000289393.jpg',
 '000000522713.jpg',
 '000000181666.jpg',
 '000000017627.jpg',
 '000000143931.jpg',
 '000000303818.jpg',
 '000000463730.jpg',
 '000000460347.jpg',
 '000000322864.jpg',
 '000000226111.jpg',
 '000000153299.jpg',
 '000000308394.jpg',
 '000000456496.jpg',
 '000000058636.jpg',
 '000000041888.jpg',
 '000000184321.jpg',
 '000000565778.jpg',
 '000000297343.jpg',
 '000000336587.jpg',
 '000000122745.jpg',
 '000000219578.jpg',
 '000000555705.jpg',
 '000000443303.jpg',
 '000000500663.jpg',
 '000000418281.jpg',
 '000000025560.jpg',
 '000000403817.jpg',
 '000000085329.jpg',
 '000000329323.jpg',
 '000000239274.jpg',
 '000000286994.jpg',
 '00000051132

In [13]:
from tqdm import tqdm


mapping_external_id_to_semanticjob = convert_coco_to_kili(coco_format=coco_format)
asset_ids = kili.assets(project_id=project_id, fields=["id", "externalId"], first=1000)

for i, asset_id in tqdm(enumerate(asset_ids)):
    external_id = asset_id["externalId"]

    if external_id in mapping_external_id_to_semanticjob:
        semantic_job = mapping_external_id_to_semanticjob[external_id]

        print(f"Nb annotation on image {external_id}", len(semantic_job["annotations"]))
        kili.append_to_labels(
            label_asset_id=asset_id["id"],
            json_response={"SEMANTIC_JOB": SemanticJob(annotations=semantic_job["annotations"])},
        )
    else:
        print("Warning", external_id)


Nb categories 80
Nb annotations 36781
Nb images 5000
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size
counts
size

100%|██████████| 50/50 [00:00<00:00, 72.20it/s] 
1it [00:00,  8.54it/s]

Nb annotation on image 000000397133.jpg 19
Nb annotation on image 000000037777.jpg 14


3it [00:00,  6.16it/s]

Nb annotation on image 000000252219.jpg 9
Nb annotation on image 000000087038.jpg 17


5it [00:00,  6.35it/s]

Nb annotation on image 000000174482.jpg 12
Nb annotation on image 000000403385.jpg 2


7it [00:01,  5.43it/s]

Nb annotation on image 000000006818.jpg 1
Nb annotation on image 000000480985.jpg 14


9it [00:01,  6.77it/s]

Nb annotation on image 000000458054.jpg 11
Nb annotation on image 000000331352.jpg 2


11it [00:01,  6.46it/s]

Nb annotation on image 000000296649.jpg 29
Nb annotation on image 000000386912.jpg 17


13it [00:02,  6.63it/s]

Nb annotation on image 000000502136.jpg 4
Nb annotation on image 000000491497.jpg 12


15it [00:02,  7.40it/s]

Nb annotation on image 000000184791.jpg 7
Nb annotation on image 000000348881.jpg 6


17it [00:02,  8.24it/s]

Nb annotation on image 000000289393.jpg 4
Nb annotation on image 000000522713.jpg 13


19it [00:02,  8.19it/s]

Nb annotation on image 000000181666.jpg 17
Nb annotation on image 000000017627.jpg 16


21it [00:03,  8.03it/s]

Nb annotation on image 000000143931.jpg 3
Nb annotation on image 000000303818.jpg 20


23it [00:03,  7.73it/s]

Nb annotation on image 000000463730.jpg 23
Nb annotation on image 000000460347.jpg 7


25it [00:03,  7.40it/s]

Nb annotation on image 000000322864.jpg 7
Nb annotation on image 000000153299.jpg 2


27it [00:03, 10.38it/s]

Nb annotation on image 000000308394.jpg 4
Nb annotation on image 000000456496.jpg 5


31it [00:04, 11.39it/s]

Nb annotation on image 000000041888.jpg 3
Nb annotation on image 000000184321.jpg 1


33it [00:04,  7.92it/s]

Nb annotation on image 000000565778.jpg 9
Nb annotation on image 000000297343.jpg 1


35it [00:04,  8.21it/s]

Nb annotation on image 000000336587.jpg 2
Nb annotation on image 000000122745.jpg 1


37it [00:04,  8.50it/s]

Nb annotation on image 000000219578.jpg 4
Nb annotation on image 000000555705.jpg 2


39it [00:05,  7.69it/s]

Nb annotation on image 000000443303.jpg 3
Nb annotation on image 000000500663.jpg 3


41it [00:05,  7.84it/s]

Nb annotation on image 000000418281.jpg 2
Nb annotation on image 000000025560.jpg 4


43it [00:05,  7.70it/s]

Nb annotation on image 000000403817.jpg 3
Nb annotation on image 000000085329.jpg 4


45it [00:06,  7.01it/s]

Nb annotation on image 000000329323.jpg 14
Nb annotation on image 000000239274.jpg 14


47it [00:06,  7.86it/s]

Nb annotation on image 000000286994.jpg 9
Nb annotation on image 000000511321.jpg 4


49it [00:06,  8.55it/s]

Nb annotation on image 000000314294.jpg 2
Nb annotation on image 000000233771.jpg 20


50it [00:06,  7.60it/s]


In [14]:
KILI_URL="https://cloud.kili-technology.com/"
print(f"{KILI_URL}label/projects/{project_id}/menu/queue?currentPage=1&pageSize=20")

https://cloud.kili-technology.com/label/projects/cl4wsghwh1xvb0mtgbuu7f4g1/menu/queue?currentPage=1&pageSize=20
