# Import Azure ML Labeling Tags to Custom Vision Service

In [None]:
# install packages if needed
import sys
!{sys.executable} -m pip install azure-cognitiveservices-vision-customvision
!{sys.executable} -m pip install azureml-sdk
!{sys.executable} -m pip install azureml-contrib-dataset

In [None]:
import json, os, shutil
import azureml.contrib.dataset

from azureml.core import Workspace, Dataset, Datastore
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region

# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-contrib-dataset of version 1.0.72 or higher is required

## 1. Set up Custom Vision project

Enter the details for your Custom Vision endpoint and training key below:

In [None]:
ENDPOINT = 'https://YOUR_REGION.api.cognitive.microsoft.com'
training_key = "<CUSTOM VISION TRAINING KEY>"

trainer = CustomVisionTrainingClient(training_key, endpoint=ENDPOINT)

### Option 1: Create new project

The below cell will create a new Custom Vision project.  Enter a name for your project below:

Note: If you have an existing project, skip to the next cell.

In [None]:
project_name = "<PROJECT NAME>"

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() if domain.type == "ObjectDetection" and domain.name == "General")

# Create a new project
print("Creating new project...")
project = trainer.create_project(project_name, domain_id=obj_detection_domain.id)
print(project.name, "project created")

### Option 2: Update existing Custom Vision project

The below cell will get an existing project by ID.  Enter your project ID below (you can retrieve this ID from your project in the [Custom Vision portal](http://customvision.ai)):

In [None]:
project_id = "<PROJECT ID>"

# Get existing project
project = trainer.get_project(project_id = project_id) 
print(project.name, "project retrieved")

## 2. Get labeled dataset from Azure ML

After labeling images with Azure Machine Learning, you can export the tags as an *Azure ML Dataset*:

<img align="left" src="../assets/aml_label_export.png">

Retrieve the resulting dataset name from your *Datasets* in Azure Machine Learning Studio, and enter it below.  Similarly, enter the details for your subscription, resource group, and workspace.  You can also retrieve your Azure ML workspace through a [workspace config.json file](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-environment#workspace)

In [None]:
subscription_id = '<SUBSCRIPTION ID>'
resource_group = '<RESOURCE GROUP>'
workspace_name = '<AML WORKSPACE NAME>'
dataset_name = '<LABELED DATASET NAME>'

ws = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(ws, name=dataset_name)
df = dataset.to_pandas_dataframe()

## 3. Download images and parse labels

We've downloaded the image locations and associated label information into a dataframe `df` and need to download the actual images into a temporary directory by parsing the image details.  The downloaded Azure ML Dataset gives us a `StreamInfo` object that we need to parse to get the (1) datastore name and (2) image paths within the datastore. 

Note: The below code sample converts the `StreamInfo` object into a string and parses it through string operations.  This logic should be changed to access the `StreamInfo` properties directly.

In [None]:
# hack to extract datastore name - no documentation on how to parse StreamInfo - TODO: parse StreamInfo correctly
s = str(df.iloc[0].image_url) 
s = s.split('[')[1].split(']')[0]
s = s.replace("'", "\"") 
ds = json.loads(s)['datastoreName']

# get datastore
blob_datastore = Datastore.get(ws, ds)

# create temp directory for labeled dataset download
tmp_dir = '../tmp'
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

# hack to extract path - no documentation on how to parse StreamInfo - TODO: parse StreamInfo correctly
df['path_to_download'] = df['image_url'].apply(lambda x: str(x).split('//')[1].split("[")[0])

### Prep images and format tags for Custom Vision

In [None]:
tagged_ims = []
tags = trainer.get_tags(project.id)

for i, img in df.iterrows():
    prefix = img['path_to_download']
    blob_datastore.download(target_path=tmp_dir, prefix=prefix)
    filename = os.path.join(tmp_dir,prefix)
    
    labels = img['label']
    regions = []
    
    # parse labels
    for label in labels:
        label_name = label['label']

        l = label['topX']
        t = label['topY']
        r = label['bottomX']
        b = label['bottomY']

        w = r-l
        h = b-t
        
        # retrieve tag object by label name
        try:
            index = [x.name for x in tags].index(label_name)
            tag = tags[index]
        # create tag if it does not exist yet
        except:
            print("Creating new tag for:", label_name)
            tag = trainer.create_tag(project.id, label_name)
            tags = trainer.get_tags(project.id)

        # create bounding box regions
        regions.append(Region(tag_id=tag.id,left=l,top=t,width=w,height=h))

        with open(filename, mode="rb") as im_data:
            tagged_ims.append(ImageFileCreateEntry(name=filename, contents=im_data.read(), regions=regions))

## 4. Upload images and tags to Custom Vision

In [None]:
print("Uploading images and tags")
trainer.create_images_from_files(project.id, images=tagged_ims)

# clean up temp directory
shutil.rmtree(tmp_dir)