Copyright (c) Microsoft Corporation. **Licensed under the GNU General Public License v3.0**

# Train your own Model and Deploy to Device

## Semantic segmentation with UNet

This notebook demonstrates how to train a model using Azure Machine Learning and deploy it to the Azure Percept DK. The model implementation is based on https://github.com/milesial/Pytorch-UNet with some modifications,
and is therefore taken under GPLv3.0.  

This notebook is intended to run on an [Azure ML remote compute instance](https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-instance). To get started, make a directory in your AML workspace called "unet-notebook" and then upload this notebook to that directory,
then run through each cell. This notebook will download the GitHub repository and train a U-Net to do semantic segmentation of bananas.

### 1. Initialization

In [None]:
# Get the repository and cd into the right directory
!git clone https://github.com/microsoft/azure-percept-advanced-development.git
%cd azure-percept-advanced-development/machine-learning-notebooks/train-from-scratch

In [None]:
# These packages are pre-installed on an Azure ML remote compute instance
import torch
import torch.nn as nn
import os
import cv2
import matplotlib.pylab as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = 20,20
import numpy as np
from PIL import Image

In [None]:
# Use the default datasore associated with the current workspace
from azureml.core import Workspace
from azureml.core import Dataset

ws = Workspace.from_config()
datastore = ws.get_default_datastore()

In [None]:
# Upload our data to the datastore
root_data_path = './data'
data_path = 'datasets/bananas_dataset'
datastore.upload(src_dir=root_data_path, target_path=data_path)

In [None]:
# Register the uploaded data as an Azure ML datatset, so it can be accessed from the compute cluster doing the training
dataset = Dataset.File.from_files(path=(datastore, data_path))
dataset = dataset.register(workspace=ws, name='bananas_dataset', description='bananas unet training data')

In [None]:
# Define input/output locations
root_outputs_path = './outputs'
train_path = os.path.join(root_data_path, 'images')
mask_path = os.path.join(root_data_path, 'masks')

model_path = os.path.join(root_outputs_path, "model")
if not os.path.exists(model_path):
    os.makedirs(model_path)
model_file = os.path.join(model_path, "bananas.pth")

onnx_path = os.path.join(root_outputs_path, "onnx")
if not os.path.exists(onnx_path):
    os.makedirs(onnx_path)
onnx_output = os.path.join(onnx_path, "bananas.onnx")

ir_output_path = os.path.join(root_outputs_path, "intel")
if not os.path.exists(ir_output_path):
    os.makedirs(ir_output_path)

### 2. Draw a sample image and mask

In [None]:
file_name = "2021-01-11T224237.988294Z"
img_file = os.path.join(train_path, f"{file_name}.jpg")
mask_file = os.path.join(mask_path, f"{file_name}.png")

os.path.exists(img_file), os.path.exists(mask_file)

In [None]:
sample = cv2.imread(img_file)

# OpenCV doesn't read .gif files. Workaround
mask_pil = Image.open(mask_file)
mask = np.array(mask_pil)

In [None]:
def draw_image_mask(sample, mask):
    fig, ax = plt.subplots(1, 2, figsize=(20, 20))
    ax[0].imshow(cv2.cvtColor(sample, cv2.COLOR_BGR2RGB))
    ax[0].axis('off')
    ax[0].title.set_text('Sample Image')
    ax[1].imshow(mask)
    ax[1].axis('off')
    ax[1].title.set_text('Sample Mask')
    
draw_image_mask(sample, mask)

### 3. Provision a GPU compute cluster

In [None]:
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.from_config() 

# Choose a name for your compute cluster
cluster_name = "gpu1"

# Verify that the cluster does not exist already
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    vm_size = "STANDARD_NC6"  # This is a smallish GPU node. Make sure to use a subscription that has this.
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           idle_seconds_before_scaledown=2400,
                                                           min_nodes=0,
                                                           max_nodes=1)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)

cluster.wait_for_completion(show_output=True)

### 4. Train the model on the GPU cluster

In [None]:
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset

ws = Workspace.from_config()
dataset = Dataset.get_by_name(workspace=ws, name='bananas_dataset')

experiment = Experiment(workspace=ws, name='bananas-experiment')

config = ScriptRunConfig(
    source_directory='.',
    script='train.py',
    compute_target='gpu1',
    arguments=[
        '--data-path', dataset.as_named_input('input').as_mount(),
        '--output-path', './outputs',
        '--epochs', 3,
        '--batch-size', 2,
        '--learning-rate', 0.001,
        '--scale', 0.5,
        '--to-bgr'
    ],
)
# set up the training environment
env = Environment.from_conda_specification(
    name='train-env',
    file_path='./train-env.yml'
)
# use a customized docker image 
env.docker.base_image = None
env.docker.base_dockerfile = "./Dockerfile" 
config.run_config.environment = env

run = experiment.submit(config)
aml_url = run.get_portal_url()
print("Submitted to compute cluster. You can monitor the run progress using the link below:")
print(aml_url)

In [None]:
# Monitor the progress with Tensorboard
from azureml.tensorboard import Tensorboard

local_logdir = "./outputs/logs"
tb = Tensorboard([run], local_root=local_logdir, port=6006)
tb.stop()  # Make sure to stop any previous TensorBoard instances

!rm -rf $local_logdir
tblink = tb.start()

In [None]:
# Wait until the experiment run is completed.
run.wait_for_completion(show_output=True)

# Kill Tensorboard
tb.stop()

In [None]:
# Get the model from the latest completed run outputs
import glob
from azureml.core import Experiment

completed_run = None

experiment = Experiment(workspace=ws, name='bananas-experiment')
runs = experiment.get_runs()

for r in runs:
    if r.get_status() == 'Completed':
        completed_run = r
        break
if completed_run == None:        
    print("No completed run available")
else:
    completed_run.download_file('outputs/checkpoints/model.pth', model_file)
    print(f'Downloaded model file: {model_file}')

### 5. Infer using the original model

This simply validates that the model works

#### Prep the network

In [None]:
import unet

device = torch.device('cpu')
net = unet.UNet(n_channels=3, n_classes=1, bilinear=False)
net.to(device=device)
net.load_state_dict(torch.load(model_file, map_location=device))

#### Prep the image

In [None]:
# We scale the image by this factor during training
scale_factor = 0.5

def prep_img_for_inference(sample, scale_factor=0.5):
    img = torch.from_numpy(cv2.resize(sample, None, fx=scale_factor, fy=scale_factor)).to(device)
    # convert to CHW from HWC
    img = img.permute(2, 0, 1)
    # convert to NCHW
    img = img.unsqueeze(0)
    # convert from the OpenCV byte representation
    img = img.type(torch.FloatTensor)
    # scale to [0..1]
    img /= 255
    return img

img = prep_img_for_inference(sample, scale_factor=scale_factor)

#### Pipe it through

In [None]:
threshold = 0.5

def post_process(output, threshold=0.5, has_probs=False):
    if not has_probs:
        probs = torch.sigmoid(output)
    else:
        probs = output
    probs = probs.squeeze(0)

    out_mask = (probs > threshold).cpu().numpy().astype("int") * 255
    return out_mask

net.eval()
with torch.no_grad():
    output = net(img)
    out_mask = post_process(output, threshold=threshold)
    
out_mask = np.squeeze(out_mask, 0)
draw_image_mask(cv2.cvtColor(sample[:,:,::-1], cv2.COLOR_BGR2RGB), out_mask)

#### How accurate? 
Compute Dice Coefficient

In [None]:
from dice_loss import dice
dice(cv2.resize(mask, (out_mask.shape[1], out_mask.shape[0])), out_mask)  # cv2.resize expects (width, height)

### 6. Convert the model

#### The original code did not have output activation. Bake it in.

In [None]:
model = nn.Sequential(net, nn.Sigmoid())

#### Prep the input dimensions

In [None]:
dummy_input = prep_img_for_inference(sample)
print(f"These will be the fixed dimensions of any incoming images: {dummy_input.shape}")

#### Do the conversion with the right opset_version

In [None]:
# opset_version = 11 to support up-convolutional layers
torch.onnx.export(model, dummy_input, onnx_output, opset_version=11, 
                  export_params=True, input_names=["input"], output_names=["output"], verbose=False)

### 7. Check with ONNX Runtime

In [None]:
# We are going to use a CPU-based version of ONNX runtime in order
# to avoid CUDA compat problems
%pip install onnxruntime

#### Check the model

In [None]:
import onnx
onnx_model = onnx.load(onnx_output)
onnx.checker.check_model(onnx_model)

#### Perform inference with the model

In [None]:
import onnxruntime
ort_session = onnxruntime.InferenceSession(onnx_output)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
sample_input = prep_img_for_inference(sample)
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(sample_input)}
ort_outs = ort_session.run(None, ort_inputs)

In [None]:
out_mask = post_process(torch.from_numpy(ort_outs[0]), threshold=threshold, has_probs=True)
out_mask = np.squeeze(out_mask, 0)
draw_image_mask(sample, out_mask)

### 8. Convert the ONNX model to IR format

In [None]:
# Volume to bind to the OpenVINO container
pwd = !pwd
src_vol = os.path.join(pwd[0], 'outputs')
src_vol

In [None]:
# Convert in the OpenVINO container
!docker run --rm -v $src_vol:/working -w /working openvino/ubuntu18_dev:2021.1 \
        python3 "/opt/intel/openvino_2021/deployment_tools/model_optimizer/mo.py" \
        --input_model "./onnx/bananas.onnx" -o "./intel" --input "input" --output "output" --scale 255

### 9. Use OpenVINO to compile IR format to blob

In [None]:
%%writefile outputs/compile.sh
#!/bin/bash

# OpenVINO compilation script

source /opt/intel/openvino_2021/bin/setupvars.sh

/opt/intel/openvino_2021/deployment_tools/inference_engine/lib/intel64/myriad_compile \
    -m intel/bananas.xml -o intel/bananas.blob -VPU_NUMBER_OF_SHAVES 8 -VPU_NUMBER_OF_CMX_SLICES 8 -ip U8 -op FP32

In [None]:
# Run compilation in the container (this takes a few minutes)
!docker run --rm -v $src_vol:/working -w /working openvino/ubuntu18_dev:2021.1 /bin/bash compile.sh

### 10. Package up blob for delivery to devkit

In [None]:
%%writefile 'outputs/intel/config.json'
{
    "DomainType": "unet",
    "ModelFileName": "bananas.blob"
}

In [None]:
!cd outputs/intel && zip model.zip bananas.blob config.json

### 11. Upload the blob

In [None]:
# Use the default datatstore for upload
ds = ws.get_default_datastore()
print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)

In [None]:
# Do upload
ds.upload_files(['outputs/intel/model.zip'], target_path='models', overwrite=True)

In [None]:
# Install Azure Storage tools 
%pip install azure-storage-blob==2.1.0 msrest

In [None]:
# Generate download SAS URL for model.zip
from datetime import datetime, timedelta
from azure.storage.blob import (
    BlockBlobService,
    ContainerPermissions,
    BlobPermissions,
    PublicAccess,
)
   
AZURE_ACC_NAME = ds.account_name
AZURE_PRIMARY_KEY = ds.account_key
AZURE_CONTAINER = ds.container_name
AZURE_BLOB=ds.name
AZURE_File='models/model.zip' 

block_blob_service = BlockBlobService(account_name=AZURE_ACC_NAME, account_key=AZURE_PRIMARY_KEY)
sas_url = block_blob_service.generate_blob_shared_access_signature(AZURE_CONTAINER,
                                                                   AZURE_File,
                                                                   permission=BlobPermissions.READ,
                                                                   expiry= datetime.utcnow() + timedelta(hours=30*24))
downloadurl ='https://'+AZURE_ACC_NAME+'.blob.core.windows.net/'+AZURE_CONTAINER+'/'+AZURE_File+'?'+sas_url
print(downloadurl)

### 12. Perform Module Twin update

In [None]:
# Install Azure Iot Hub tools
%pip install azure-iot-hub

In [None]:
import sys
from azure.iot.hub import IoTHubRegistryManager
from azure.iot.hub.models import Twin, TwinProperties

In [None]:
# Incorporate the connection string, device_id and the module_id values from your IoT Hub
# Go to https://portal.azure.com
# Select your IoT Hub
# Click on Shared access policies
# Click 'service' policy on the right (or another policy having 'service connect' permission)
# Copy Connection string--primary key

CONNECTION_STRING = "<YOUR-CONNECTION-STRING-PRIMARY-KEY>"

DEVICE_ID = "<YOUR-DEVICE-NAME>"
# If you have changed the name of the azureeyemodule for some reason,
# you will need to change it here too.
MODULE_ID = "azureeyemodule"

iothub_registry_manager = IoTHubRegistryManager(CONNECTION_STRING)
module_twin = iothub_registry_manager.get_module_twin(DEVICE_ID, MODULE_ID)

print ( "" )
print ( "Module twin properties before update    :" )
print ( "{0}".format(module_twin.properties) )

In [None]:
# Update twin
twin_patch = Twin()
twin_patch.properties = TwinProperties(desired={"ModelZipUrl": downloadurl})
updated_module_twin = iothub_registry_manager.update_module_twin(DEVICE_ID, MODULE_ID, twin_patch, module_twin.etag)

print ( "" )
print ( "Module twin properties after update     :" )
print ( "{0}".format(updated_module_twin.properties) )

The trained model will get pushed to the IoT Edge device via module twin update method.  

Check video stream using [VLC media player](https://www.videolan.org/vlc/) on a local PC:
1. Select Media -> Open Network Stream…  
2. Input the network stream: `rtsp://<ip of the devkit>:8554/result`  
3. Click “Play” button.  