# Import AML Labeler Tags to Custom Vision

In [None]:
import json, os, shutil
import azureml.contrib.dataset

from azureml.core import Workspace, Dataset, Datastore

from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region

from PIL import Image

## Download labeled dataset from Azure ML

In [None]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-contrib-dataset of version 1.0.72 or higher is required

from azureml.core import Workspace, Dataset
import azureml.contrib.dataset

subscription_id = '<SUBSCRIPTION ID>'
resource_group = '<RESOURCE GROUP>'
workspace_name = '<AML WORKSPACE NAME>'
dataset_name = '<LABELED DATASET NAME>'

ws = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(ws, name=dataset_name)
df = dataset.to_pandas_dataframe()

## Set up Custom Vision project

In [None]:
ENDPOINT='https://YOUR_REGION.api.cognitive.microsoft.com'
project_name = "Aerial Images"

training_key = "<CUSTOM VISION TRAINING KEY>"
trainer = CustomVisionTrainingClient(training_key, endpoint=ENDPOINT)

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() if domain.type == "ObjectDetection" and domain.name == "General")

# Create a new project
print ("Creating project...")
project = trainer.create_project(project_name, domain_id=obj_detection_domain.id)

### Create tags

In [None]:
# sample tags - TODO: enable many tags
vehicle_tag = trainer.create_tag(project.id, "vehicle")
building_tag = trainer.create_tag(project.id, "building")

## Download images and parse labels

In [None]:
# hack to extract datastore name - no documentation on how to parse StreamInfo - TODO: parse StreamInfo correctly
s = str(df.iloc[0].image_url) 
s = s.split('[')[1].split(']')[0]
s = s.replace("'", "\"") 
ds = json.loads(s)['datastoreName']

# get datastore
blob_datastore = Datastore.get(ws, ds)

In [None]:
# create temp directory for labeled dataset download
tmp_dir = '../tmp'
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

# hack to extract path - no documentation on how to parse StreamInfo - TODO: parse StreamInfo correctly
df['path_to_download'] = df['image_url'].apply(lambda x: str(x).split('//')[1].split("[")[0])

## Prep images and format tags for Custom Vision

In [None]:
tagged_ims = []

for i, img in df.iterrows():
    prefix = img['path_to_download']
    blob_datastore.download(target_path=tmp_dir, prefix=prefix)
    filename = os.path.join(tmp_dir,prefix)
    
    im = Image.open(filename)
    img_w, img_h = im.size
    labels = img['label']
    
    regions = []
    
    # parse labels
    for label in labels:
        label_name = label['label']

        l = label['topX']
        t = label['topY']
        r = label['bottomX']
        b = label['bottomY']

        w = r-l
        h = b-t

        if (label_name == 'vehicle'):
            tag_id = vehicle_tag.id
        elif(label_name == 'building'):
            tag_id = building_tag.id

        # create bounding box regions
        regions.append(Region(tag_id=tag_id,left=l,top=t,width=w,height=h))

        with open(filename, mode="rb") as im_data:
            tagged_ims.append(ImageFileCreateEntry(name=filename, contents=im_data.read(), regions=regions))

## Upload images and tags to Custom Vision

In [None]:
print("Uploading images and tags")
trainer.create_images_from_files(project.id, images=tagged_ims)

In [None]:
# optional clean up of temp directory
shutil.rmtree(tmp_dir)