# Converting AML Labeled Dataset to Coco annotation file 

Image Multi Label, Image Multi Class


In [None]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-contrib-dataset of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset
import azureml.contrib.dataset

subscription_id = 'f375b912-331c-4fc5-8e9f-2d7205e3e036'
resource_group = 'labeling-canary-rg'
workspace_name = 'MLAssistCanary'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='ML(voc2012_ml4)_canary_1123_20201124_205854')

In [None]:
df = dataset.to_pandas_dataframe()

In [None]:
project_type = None
if dataset.description.find('ImageClassification') != -1:
    project_type = "ImageMultiClass"

In [None]:
from datetime import datetime
category_id_map = {}

def get_category_id(category_name: str):
    if category_name not in category_id_map:
        category_id_map[category_name] = len(category_id_map)+1
    return category_id_map[category_name]

def convert_df_to_coco(df):
    now = datetime.utcnow().isoformat()
    # create coco object
    images = []
    annotations = []
    categories = []
    image_id = 0
    annotation_id = 0
    for _, row in df.iterrows():
        image_id += 1
        for label in row['label']:
            annotation_id += 1
            category_id = get_category_id(label)
            anno = {}
            anno['id'] = annotation_id
            anno['category_id'] = category_id
            anno['image_id'] = image_id
            anno['area'] = 0.0
            annotations.append(anno)
        image = {}
        image['id'] = image_id
        image['width'] = 0.0
        image['height'] = 0.0
        image['file_name'] = row['image_url'].resource_identifier
        pod = row['image_url'].to_pod()
        image['coco_url'] = "{}://{}/{}".format(pod['handler'], pod['arguments']['datastoreName'], pod['resourceIdentifier'])
        image['date_captured'] = now
        images.append(image)
        
    # build category
    for category_name in category_id_map:
        category = {}
        category['id'] = category_id_map[category_name]
        category['name'] = category_name
        categories.append(category)
        
    # coco object
    coco_obj = {}
    coco_obj['images'] = images
    coco_obj['annotations'] = annotations
    coco_obj['categories'] = categories
    return coco_obj

In [None]:
coco_obj = convert_df_to_coco(df)

In [None]:
coco_obj