# Create a Semantic Segmentation Network

Copyright (c) Microsoft Corporation.
Licensed under the MIT license.

This tutorial will walk you through a typical workflow for creating a new model for the Azure Percept DK.
See the [GitHub](https://github.com/microsoft/azure-percept-advanced-development) for the rest of the
steps to port this model to the device.

In [None]:
# This cell imports everything we need
from azureml.core import ComputeTarget
from azureml.core import Workspace
from azureml.core import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig
from azureml.core import Environment
from azureml.core import Experiment
from azureml.tensorboard import Tensorboard
from PIL import Image
from torchvision import transforms as T
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import shutil
import torch
import train  # Must be in the directory that contains the train.py script!

%matplotlib inline
plt.rcParams["figure.figsize"] = (20, 20)

%pip install onnxruntime
import onnx
import onnxruntime

In [None]:
# We'll put everything here
topdir = !pwd
topdir = topdir[0]
topdir

## Dataset

First, we'll need a dataset.

We'll use the Pascal VOC dataset (http://host.robots.ox.ac.uk/pascal/VOC/).
The VOC dataset does not have a whole lot of images, so let's combine the train and val split (which are initially
split at about 50/50) and resplit at 90/10.

Also, the Pascal VOC dataset has 20 classes - let's combine them all into just 5:

* Background
* Person
* Animal
* Vehicle
* Indoor

In [None]:
# Warning! The first time you run this cell will take around an hour or two, as the whole dataset must be downloaded to your workspace and then extracted.
# Subsequent times will just reuse the cached dataset (unless you delete it).
imgsize = 128  # We will resize to this value
dataset_dir = os.path.join(topdir, "dataset")
download = not os.path.isdir(dataset_dir)
x_transforms, y_transforms = train.get_transforms(size=imgsize)
dataset_train = train.TransformedVocDataset(dataset_dir, image_set="train", download=download, x_transforms=x_transforms, y_transforms=y_transforms)

In [None]:
# This cell will also take about an hour or two. Frown.
dataset_dir_val = os.path.join(topdir, "dataset-val")
download = not os.path.isdir(dataset_dir_val)
dataset_val = train.TransformedVocDataset(dataset_dir_val, image_set="val", download=download, x_transforms=x_transforms, y_transforms=y_transforms)

In [None]:
# Since VOC is a pretty small dataset, but they have a 50/50 split on train and val,
# let's combine their train and val splits and resplit at like 90/10, which will
# give us some more data to work with. (The resplitting occurs later on - here we are just concating the datasets).
dataset = torch.utils.data.ConcatDataset([dataset_train, dataset_val])

In [None]:
# Make sure the dataset works and take a look at a few images
# to see if we like the augmentations.
nimgs = 5
for i in range(0, nimgs * 2, 2):
    x, y = dataset[i]

    # Convert X to an image
    x = T.ToPILImage()(x)

    # Convert Y to an image
    y = dataset_train.mask_tensor_to_pil_image(y)

    plt.subplot(nimgs, 2, i + 1)
    plt.imshow(x)

    plt.subplot(nimgs, 2, i + 2)
    plt.imshow(y)

plt.show()

In [None]:
# How many images?
print(len(dataset))

In [None]:
# Determine how many of each pixel are of each class so we can weight the loss function appropriately
histogram = [0 for _ in range(len(train.VOC_CLASSES_COMBINED))]

for _, y in tqdm(dataset):
    for classidx in range(len(train.VOC_CLASSES_COMBINED)):
        histogram[classidx] += torch.sum(y == classidx).item()

weights = [min(histogram) / histogram[i] for i in range(len(train.VOC_CLASSES_COMBINED))]

for i, count in enumerate(histogram):
    print(f"{train.VOC_CLASSES_COMBINED[i]}: {count}, or about {(100.0 * count / sum(histogram)):.2f}% of the dataset. So weighting with {weights[i]}")

# Now adjust the background weight: I find that we need to penalize false positives a bit more heavily
weights[0] *= 10
print(f"Background weight updated to {weights[0]}")

## Train the Network

Here we create an AML experiment and submit it to the created GPU node. You can track the progress using AML logging.

Of course, you don't have to use Azure Machine Learning services. You can use whatever workflow you are used to. This notebook
just walks you through an example that uses AML, but under the hood, it is just running PyTorch.

As long as you end up with an ONNX model at the end (or a Tensorflow model), you should be good.

In [None]:
# Use the default datastore associated with this workspace
ws = Workspace.from_config()
datastore = ws.get_default_datastore()
datastore_data_path = "datasets/voc-segmentation-train-tutorial"
datastore_data_path_val = "datasets/voc-segmentation-val-tutorial"

In [None]:
# Upload the dataset to the datastore (this cell takes like 10 minutes but only ever needs to be run once)
datastore.upload(src_dir=dataset_dir, target_path=datastore_data_path)
datastore.upload(src_dir=dataset_dir_val, target_path=datastore_data_path_val)

In [None]:
# Register the uploaded dataset as an AML Dataset object
amldataset = Dataset.File.from_files(path=(datastore, datastore_data_path))
amldataset = amldataset.register(workspace=ws, name="voc-segmentation-train-tutorial", description="VOC for Segmentation")
amldataset_val = Dataset.File.from_files(path=(datastore, datastore_data_path_val))
amldataset_val = amldataset_val.register(workspace=ws, name="voc-segmentation-val-tutorial", description="VOC (val) for Segmentation")

In [None]:
# Provision a compute cluster if you don't already have one
cluster_name = "gpu1"
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found an existing cluster. We will use this one.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_NC6",  # Make sure to choose one that your subscription has
                                                           idle_seconds_before_scaledown=2400,
                                                           min_nodes=0,
                                                           max_nodes=1)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)
cluster.wait_for_completion(show_output=True)

In [None]:
# Turn the class weights into a string for ingestion by the training script
class_weights_string = ""
for w in weights:
    class_weights_string += f"{w} "
class_weights_string = class_weights_string.rstrip(" ")
class_weights_string

In [None]:
# Clean up previous runs before submitting
shutil.rmtree("logs", ignore_errors=True)
shutil.rmtree("outputs", ignore_errors=True)
!mkdir -p logs

# Create the experiment and submit it. This is the cell that actually kicks off training.
experiment = Experiment(workspace=ws, name="AzurePerceptDKTutorial")
config = ScriptRunConfig(
    source_directory=".",
    script="train.py",
    compute_target=cluster_name,
    arguments=[
        "--dataset", amldataset.as_named_input("input_train").as_mount(),
        "--dataset-val", amldataset_val.as_named_input("input_val").as_mount(),
        "--resize", imgsize,
        "--batchsize", 128,
        "--learning-rate", 0.001,
        "--nepochs", 400,
        "--split", 0.9,
        "--weights", class_weights_string
    ]
)

# Set up the training environment (see https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments
# for a list of curated environments if you don't want to create one from a requirements.txt or a Conda YAML)
env = Environment.from_pip_requirements(name="PyTorch-AzurePerceptDK-Env", file_path="requirements.txt")
env.docker.enabled = True
env.docker.base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn7-ubuntu18.04"
config.run_config.environment = env

# Kick off the experiment!
run = experiment.submit(config)

# Print out a link to the experiment for tracking using AML
print("Submitted to compute cluster. Click the link below.")
print(run.get_portal_url())

In [None]:
# Tensorboard
tb = Tensorboard([run])
tb.start()
print("Click the link above to view the output on TensorBoard")

# Block until run completes.
run.wait_for_completion(show_output=True)

In [None]:
tb.stop()

In [None]:
# Get the results of all our efforts
experiment = Experiment(workspace=ws, name="AzurePerceptDKTutorial")
runs = experiment.get_runs()

# Grab the run we used. It is likely the user has stopped and come back after the run,
# so if that's the case, let's look through the runs under the experiment name to find
# the latest completed one (get_runs() is in reverse chronological order).
try:
    completed_run = run
except NameError:
    completed_run = None
    for r in runs:
        if r.get_status() == "Completed":
            completed_run = r
            break

if completed_run is None:
    print("No runs completed yet.")
else:
    print("Downloading outputs...")
    completed_run.download_files("outputs")

## Try out our Model

In [None]:
# Load up the model
model = train.UNet(len(train.VOC_CLASSES_COMBINED))
model.load_state_dict(torch.load("outputs/model.pth"))
model.eval()  # Don't forget to set the model to eval mode!

In [None]:
# Run some examples through it
# NOTE! These images may have been in the training split, so don't get too excited :P
# We don't know exactly which images went into the training split
# (we could figure it out, but the point here is just to make sure that we can
# load the model and then later convert it to ONNX and still get the same results,
# NOT to show that the model does a good job).
nimgs = 5
for i in range(0, nimgs * 3, 3):
    x, y = dataset[i]

    # Convert X to an image
    ximg = T.ToPILImage()(x)

    # Convert Y to an image
    yimg = dataset_train.mask_tensor_to_pil_image(y)

    # Run X through the model and convert output to an image
    pred = model(x.unsqueeze(0))
    predimg = train.TransformedVocDataset.one_hot_tensor_to_pil_image(pred[0])

    plt.subplot(nimgs, 3, i + 1)
    plt.imshow(ximg)

    plt.subplot(nimgs, 3, i + 2)
    plt.imshow(yimg)

    plt.subplot(nimgs, 3, i + 3)
    plt.imshow(predimg)

plt.show()

In [None]:
# Now let's run it on a video from the OpenVINO model zoo test website. We can directly
# compare this model in PyTorch to the same model in OpenVINO later using this video.
!wget https://github.com/intel-iot-devkit/sample-videos/raw/master/person-bicycle-car-detection.mp4 -O outputs/movie.mp4

In [None]:
cap = cv2.VideoCapture(os.path.join("outputs", "movie.mp4"))
if not cap.isOpened():
    print("Could not open video.")

# Spool out all the frames from the video
frames = []
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        # Change from OpenCV's BGR to RGB
        frame = frame[:, :, ::-1]
        frames.append(frame)
    else:
        break
cap.release()

# Choose some interesting frames to display
imgs = [
    Image.fromarray(frames[40]),
    Image.fromarray(frames[50]),
    Image.fromarray(frames[200]),
    Image.fromarray(frames[325]),
    Image.fromarray(frames[560]),
]

shutil.rmtree(os.path.join("outputs", "input-imgs"), ignore_errors=True)
os.makedirs(os.path.join("outputs", "input-imgs"))

nimgs = len(imgs)
j = 0
for i in range(0, nimgs * 2, 2):
    ximg = imgs[j]
    j += 1

    # Preprocess X
    ximg = T.Resize((imgsize, imgsize))(ximg)
    x = T.ToTensor()(ximg)  # Converts from [0, 255] -> [0.0, 1.0]

    # Save ximg for later
    ximg.save(os.path.join("outputs", "input-imgs", f"img{j}.png"))
    
    # Run X through the model and convert output to an image
    pred = model(x.unsqueeze(0))
    predimg = train.TransformedVocDataset.one_hot_tensor_to_pil_image(pred[0])

    plt.subplot(nimgs, 2, i + 1)
    plt.imshow(ximg)

    plt.subplot(nimgs, 2, i + 2)
    plt.imshow(predimg)

plt.show()

## Convert to ONNX

We need to convert the model to ONNX runtime, since ultimately we need it in OpenVINO IR or .blob format, and OpenVINO does not understand Pytorch's
native model format.

In [None]:
# Convert to ONNX
dummy_input, _ = dataset[0]
dummy_input = dummy_input.unsqueeze(0)  # Add a batch dimension
torch.onnx.export(model, dummy_input, "outputs/model.onnx", export_params=True, input_names=["input"], output_names=["output"], verbose=False)

# Load it back into memory to make sure that's possible
onnx_model = onnx.load("outputs/model.onnx")
onnx.checker.check_model(onnx_model)

In [None]:
# Make sure it still works
ort_session = onnxruntime.InferenceSession("outputs/model.onnx")

nimgs = 5
for i in range(0, nimgs * 3, 3):
    x, y = dataset[i]

    # Convert X to an image
    ximg = T.ToPILImage()(x)

    # Convert Y to an image
    yimg = dataset_train.mask_tensor_to_pil_image(y)

    # Run X through the model and convert output to an image
    ort_inputs = {ort_session.get_inputs()[0].name: x.unsqueeze(0).detach().cpu().numpy()}
    pred = ort_session.run(None, ort_inputs)
    pred = pred[0].squeeze()  # ORT returns a list of outputs, but we only have one output.
    predimg = train.TransformedVocDataset.one_hot_tensor_to_pil_image(torch.tensor(pred))

    plt.subplot(nimgs, 3, i + 1)
    plt.imshow(ximg)

    plt.subplot(nimgs, 3, i + 2)
    plt.imshow(yimg)

    plt.subplot(nimgs, 3, i + 3)
    plt.imshow(predimg)

plt.show()

## Convert the ONNX Model to OpenVINO IR and then to Blob Format

The Azure Percept DK requires the device to be in OpenVINO IR or OpenVINO Myriad X blob format.
So let's convert it.

In [None]:
# Use openvino/ubuntu18_dev:2021.1 Docker image to do this
# See the documentation: https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model.html
# Note that we need to scale input values by 255, since our network was trained with normalized
# inputs, but our inputs are going to be uint8 values from the camera.
# This means that the OpenVINO network we create here will expect inputs in [0, 255] NOT [0.0, 1.0].
!docker run --rm -v `realpath outputs`:/blah -w /blah openvino/ubuntu18_dev:2021.1 \
    python3 "/opt/intel/openvino_2021/deployment_tools/model_optimizer/mo.py" \
    --input_model "./model.onnx" -o "." --input "input" --output "output" --scale 255

In [None]:
# Use the same Docker image for model conversion from IR to blob
!docker run --rm -v `realpath outputs`:/blah -w /blah openvino/ubuntu18_dev:2021.1 \
    /bin/bash -c "source /opt/intel/openvino/bin/setupvars.sh && /opt/intel/openvino_2021/deployment_tools/inference_engine/lib/intel64/myriad_compile \
    -m ./model.xml \
    -o ./model.blob \
    -VPU_NUMBER_OF_SHAVES 8 \
    -VPU_NUMBER_OF_CMX_SLICES 8 \
    -ip U8 \
    -op FP32"

## Verify that the OpenVINO IR Model Works as Expected

Let's sanity check our parameters that we used when converting to OpenVINO by
using the OpenVINO Inference Engine. Its outputs should be very close to the PyTorch outputs
from earlier.

In [None]:
%%writefile "outputs/validate_using_openvino.py"
from openvino.inference_engine import IECore
from PIL import Image
import numpy as np
import os
import shutil
ie = IECore()

imgsize = int(os.environ['IMGSIZE'])

net = ie.read_network("model.xml")
batch, channels, height, width = net.input_info["input"].input_data.shape
exec_net = ie.load_network(network=net, device_name="CPU")

# Read in the images we are going to be using
# They've already been preprocessed in a previous cell
imgs = [Image.open(os.path.join("input-imgs", fname)) for fname in os.listdir("input-imgs") if fname.endswith(".png")]

shutil.rmtree("output-imgs", ignore_errors=True)
os.makedirs("output-imgs")

# Run them through the network
for i, img in enumerate(imgs):
    # Convert to numpy, add batch dimension, and permute
    img = np.transpose(img, (2, 0, 1))
    img = np.expand_dims(img, 0)
    res = exec_net.infer(inputs={"input": img})
    res = res["output"]
    # Output shape is (1, 5, imgsize, imgsize)
    # Remove batch dimension
    res = res.reshape((5, imgsize, imgsize))
    # Save results as files (we'll look at them outside the Docker container)
    np.save(os.path.join("output-imgs", f"output-img{i}"), res)

In [None]:
# Launch Docker
!docker run --rm -e IMGSIZE=$imgsize -v `realpath outputs`:/blah -w /blah openvino/ubuntu18_runtime:2021.1 \
    /bin/bash -c "python3 -m pip install openvino && \
                  python3 -m pip install Pillow && \
                  python3 validate_using_openvino.py"

In [None]:
# Display using the images we created with OpenVINO
imgfpaths = [os.path.join("outputs", "output-imgs", fname) for fname in os.listdir(os.path.join("outputs", "output-imgs"))]
imgs = [np.load(fpath) for fpath in imgfpaths]
imgs = [train.TransformedVocDataset.one_hot_tensor_to_pil_image(torch.tensor(arr)) for arr in imgs]

inputfpaths = [os.path.join("outputs", "input-imgs", fname) for fname in os.listdir(os.path.join("outputs", "input-imgs"))]
inputs = [Image.open(i) for i in inputfpaths]

nimgs = len(imgs)
assert nimgs == len(inputs), f"Length of input images ({len(inputs)}) != length of output images ({len(imgs)})"

j = 0
for i in range(0, nimgs * 2, 2):
    ximg = inputs[j]
    predimg = imgs[j]
    j += 1

    plt.subplot(nimgs, 2, i + 1)
    plt.imshow(ximg)

    plt.subplot(nimgs, 2, i + 2)
    plt.imshow(predimg)

plt.show()

## Package the Model Up for Deployment

In this section, we upload the converted model to an Azure blob storage and then manipulate
your device's module twin's "ModelZipUrl" property to point to it.

Note that this won't work until you complete the rest of the tutorial in the GitHub! You haven't
implemented a G-API graph for this model yet, so it won't run on the device.

So go do that, and then come back when it tells you.

First, let's create a zip file that contains the following items:

1. Our labels.txt file
1. Our model.blob file (we could do the IR files instead, but it is better to use the .blob file if you have one)
1. A config.json file that will tell the azureeyemodule application where to find the files and what parser to use.

In [None]:
# Re-import stuff if we have sensibly turned off this node while doing the rest of the tutorial
from azureml.core import Workspace
from azure.iot.hub import IoTHubRegistryManager
from azure.iot.hub.models import Twin, TwinProperties
import sys
%pip install azure-storage-blob==2.1.0 msrest
%pip install azure-iot-hub

In [None]:
%%writefile 'outputs/config.json'
{
    "DomainType": "unet-seg",
    "ModelFileName": "model.blob",
    "LabelFileName": "labels.txt"
}

In [None]:
%%writefile 'outputs/labels.txt'
background
person
animal
vehicle
indoor

In [None]:
# Get the pwd again
pwd = !pwd
pwd = pwd[0]
pwd

In [None]:
!cd outputs && zip model.zip model.blob config.json labels.txt && cd $pwd

Next, let's upload this zip archive to storage.

In [None]:
# Use the default datatstore for upload
ws = Workspace.from_config()
ds = ws.get_default_datastore()
print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)

ds.upload_files(['outputs/model.zip'], target_path='tutorial-models', overwrite=True)

In [None]:
# Generate download SAS URL for model.zip
from datetime import datetime, timedelta
from azure.storage.blob import (
    BlockBlobService,
    ContainerPermissions,
    BlobPermissions,
    PublicAccess,
)
   
AZURE_ACC_NAME = ds.account_name
AZURE_PRIMARY_KEY = ds.account_key
AZURE_CONTAINER = ds.container_name
AZURE_BLOB=ds.name
AZURE_File='tutorial-models/model.zip' 

block_blob_service = BlockBlobService(account_name=AZURE_ACC_NAME, account_key=AZURE_PRIMARY_KEY)

# We'll expire this SAS in 30 days.
sas_url = block_blob_service.generate_blob_shared_access_signature(AZURE_CONTAINER,
                                                                   AZURE_File,
                                                                   permission=BlobPermissions.READ,
                                                                   expiry= datetime.utcnow() + timedelta(hours=30*24))
downloadurl ='https://'+AZURE_ACC_NAME+'.blob.core.windows.net/'+AZURE_CONTAINER+'/'+AZURE_File+'?'+sas_url
print(downloadurl)

In [None]:
# If you copy the link above ^^^^^^^^^, and put it in your ModelZipUrl right now, you should be able to run this model.
# But let's go ahead and do it from here, just for the heck of it.

# Incorporate the connection string, device_id and the module_id values from your IoT Hub
# Go to https://portal.azure.com
# Select your IoT Hub
# Click on Shared access policies
# Click 'service' policy on the right (or another policy having 'service connect' permission)
# Copy Connection string--primary key

CONNECTION_STRING = "<YOUR-CONNECTION-STRING-PRIMARY-KEY>"

DEVICE_ID = "<YOUR-DEVICE-NAME>"
# If you have changed the name of the azureeyemodule for some reason,
# you will need to change it here too.
MODULE_ID = "azureeyemodule"

iothub_registry_manager = IoTHubRegistryManager(CONNECTION_STRING)
module_twin = iothub_registry_manager.get_module_twin(DEVICE_ID, MODULE_ID)

print ( "" )
print ( "Module twin properties before update    :" )
print ( "{0}".format(module_twin.properties) )

In [None]:
# Update twin
twin_patch = Twin()
twin_patch.properties = TwinProperties(desired={"ModelZipUrl": downloadurl})
updated_module_twin = iothub_registry_manager.update_module_twin(DEVICE_ID, MODULE_ID, twin_patch, module_twin.etag)

print ( "" )
print ( "Module twin properties after update     :" )
print ( "{0}".format(updated_module_twin.properties) )