# Import Figure8 labels to Custom Vision Service

In [None]:
# install packages if needed
import sys
!{sys.executable} -m pip install azure-cognitiveservices-vision-customvision
!{sys.executable} -m pip install azureml-sdk
!{sys.executable} -m pip install azureml-contrib-dataset
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install Pillow

In [1]:
import json, os, shutil, requests, math
import pandas as pd

from PIL import Image
from io import BytesIO

from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region
from msrest.authentication import ApiKeyCredentials

# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
# azureml-contrib-dataset of version 1.0.72 or higher is required

## 1. Set up Custom Vision project

Enter the details for your Custom Vision endpoint and training key below:

In [2]:
# ENDPOINT = 'https://YOUR_REGION.api.cognitive.microsoft.com'
# training_key = "<CUSTOM VISION TRAINING KEY>"

ENDPOINT = 'https://westus2.api.cognitive.microsoft.com'
training_key = "bc43db20938e453b89b5fffc9ccbc8f9"

credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
trainer = CustomVisionTrainingClient(endpoint=ENDPOINT, credentials=credentials)

### Option 1: Create new project

The below cell will create a new Custom Vision project.  Enter a name for your project below:

Note: If you have an existing project, skip to the next cell.

In [3]:
# project_name = "<PROJECT NAME>"
project_name = "test"

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() if domain.type == "ObjectDetection" and domain.name == "General")

# Create a new project
print("Creating new project...")
project = trainer.create_project(project_name, domain_id=obj_detection_domain.id)
print(project.name, "project created")

Creating new project...
test project created


### Option 2: Update existing Custom Vision project

The below cell will get an existing project by ID.  Enter your project ID below (you can retrieve this ID from your project in the [Custom Vision portal](http://customvision.ai)):

In [None]:
project_id = "<PROJECT ID>"

# Get existing project
project = trainer.get_project(project_id = project_id) 
print(project.name, "project retrieved")

## 2. Read labeled dataset from csv

In [6]:
# filename = "<PATH TO CSV FILE WITH LABELS>" 
filename = "../data/mantech_final.csv" 

df = pd.read_csv(filename)
df = df[['annotation','image_url']]
df.head()

Unnamed: 0,annotation,image_url
0,"[{""id"": ""1322953c-2209-4113-8b51-b2a2d2311e69""...",https://cf-315f62h.s3.us-east-1.amazonaws.com/...
1,"[{""id"": ""1e8fa8b9-e530-4b45-bd56-eb30f5dfc9ef""...",https://cf-315f62h.s3.us-east-1.amazonaws.com/...
2,"[{""id"": ""bf99ed99-f008-46a0-b0ef-3588e329dd91""...",https://cf-315f62h.s3.us-east-1.amazonaws.com/...
3,"[{""id"": ""34b9ff40-bfd7-47a0-9fd3-e6f9c72928f6""...",https://cf-315f62h.s3.us-east-1.amazonaws.com/...
4,"[{""id"": ""521e21d4-99a2-44ad-abb1-3c4aa4d31bfb""...",https://cf-315f62h.s3.us-east-1.amazonaws.com/...


### Prep images and format tags for Custom Vision

In [9]:
def get_image_from_url(url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    width = img.size[0]
    height = img.size[1]
    return img, width, height

def get_bbox(label, width, height):
    l = label['x']/width
    t = label['y']/height
    w = label['w']/width
    h = label['h']/height
    
    return l,t,w,h

In [16]:
# create temp directory for labeled dataset download
tmp_dir = '../tmp'
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)

tagged_ims = []
tags = trainer.get_tags(project.id)

previous_url = ''
regions = []
for i, row in df.iterrows():
    
    # handle first as edge case to get pointer to previous image url
    if i == 0:
        previous_url = row['image_url']
        img, width, height = get_image_from_url(previous_url)
        filename = os.path.join(tmp_dir, previous_url.split('/')[-1])
        img.save(filename)
        print(f"Processing image: {row['image_name']}")
        
    # get image url and label
    img_url = row['image_url']
    label = json.loads(row['annotation'])
    
    # retrieve tag object by label name
    label_name = 'vehicle'
    try:
        index = [x.name for x in tags].index(label_name)
        tag = tags[index]
    # create tag if it does not exist yet
    except:
        print("Creating new tag for:", label_name)
        tag = trainer.create_tag(project.id, label_name)
        tags = trainer.get_tags(project.id)
      
    # create bounding box regions
    coords = label[0]['coordinates']   
    l,t,w,h = get_bbox(coords, width, height)
    regions.append(Region(tag_id=tag.id,left=l,top=t,width=w,height=h))
    
    # when we get to a new image
    if img_url != previous_url:
        
        # create image file entry for all data from previous image and append to image set
        with open(filename, mode="rb") as im_data:
            tagged_ims.append(ImageFileCreateEntry(name=filename, contents=im_data.read(), regions=regions))
        
        # get info from new image
        img, width, height = get_image_from_url(img_url)
        filename = os.path.join(tmp_dir, img_url.split('/')[-1])
        img.save(filename)
        print(f"Processing image: {row['image_name']}")
        
    # update previous url to current
    previous_url = img_url

## 4. Upload images and tags to Custom Vision

In [None]:
counter = 0

for i in range(0, len(tagged_ims), 64): 
    if counter < 2:
        batch = tagged_ims[i:i+64]
        trainer.create_images_from_files(project.id, images=batch)
        counter += 1
        print('Uploaded batch ', counter, ' of ', math.ceil(len(tagged_ims)/64))

# clean up temp directory
# shutil.rmtree(tmp_dir)