# Creating Azure Resources



In [None]:
from azureml.core import Workspace
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.authentication import InteractiveLoginAuthentication
import json
from datetime import datetime

Start with specifying your subscription, resource group, and authentication information.

In [None]:
# Subscription & resource group
SUBSCRIPTION_NAME = ""  # Name of an existing subscription 
RESOURCE_GROUP = ""  # Name of the resource group that would be created
LOCATION = ""  # Location where the resources would be created (Ex: "eastus")

In [None]:
# Install jq (This is used to parse output from the Azure CLI commands)
!sudo apt-get install -y jq

The following *az* commands require that you have logged into Azure, as mentioned in the *Prerequisites* section of this repo's *Readme* document.

In [None]:
# Get susbcription info
!az account set -s {SUBSCRIPTION_NAME}
temp = '"az account show -s \\"{}\\""'.format(SUBSCRIPTION_NAME)
cmd_result = !eval {temp} | jq -r '.id, .tenantId'
print(cmd_result)
subscription_id, tenant_id = cmd_result

In [None]:
# Create resource group
!az group create -l {LOCATION} -n {RESOURCE_GROUP}

In [None]:
# Authenticate
auth = InteractiveLoginAuthentication()

The following are parameters needed to create and access the main Azure resources. 
You can use the default values below as is, or feel free to modify as needed.

In [None]:
now_str = datetime.now().strftime("%y%m%d%H%M%S")

# AML workspace and compute target
AML_WORKSPACE = "ws{}".format(now_str)
AML_COMPUTE_NAME = "cmp{}".format(now_str) # limit to 16 chars
AML_VM_SIZE = "Standard_D2"
AML_MIN_NODES = 2
AML_MAX_NODES = 2

# AML scheduling
SCHED_FREQUENCY = "Hour"
SCHED_INTERVAL = 1

# Scoring script
PIP_PACKAGES = ["numpy", "scipy", "scikit-learn", "pandas"]
PYTHON_VERSION = "3.6.7"
PYTHON_SCRIPT_NAME = "predict.py"
PYTHON_SCRIPT_DIRECTORY = "scripts"

# Blob storage
BLOB_ACCOUNT = "ba{}".format(now_str) # limit to 24 chars
MODELS_CONTAINER = "models"
PREDS_CONTAINER = "preds"
DATA_CONTAINER = "data"
DATA_BLOB = "sensor_data.csv"  # name of data file to be copied to blob storage


Next, we create the Blob storage account and containers.

In [None]:
# Create Blob storage account
!az storage account create -n {BLOB_ACCOUNT} -g {RESOURCE_GROUP} -l {LOCATION}

In [None]:
# Retrieve Blob storage key
blob_key = !az storage account keys list -g {RESOURCE_GROUP} -n {BLOB_ACCOUNT} | jq -r .[0].value
blob_key = blob_key[0]

In [None]:
# Create models, predictions and data containers
!az storage container create -n {MODELS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {PREDS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {DATA_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}

The following commands copy the pre-trained models and sample data from this repo to blob storage so that AML can access them during job submission.

In [None]:
# Copy models from local dir to blob container
!az storage blob upload-batch -d {MODELS_CONTAINER} -s models --account-name {BLOB_ACCOUNT} --account-key {blob_key}
# Copy dataset to blob
!az storage blob upload -c {DATA_CONTAINER} -f data/'{DATA_BLOB}' -n '{DATA_BLOB}' --account-name {BLOB_ACCOUNT} --account-key {blob_key}

Below, we create the AML workspace and compute target.

In [None]:
# Create AML workspace
aml_ws = Workspace.create(
    name=AML_WORKSPACE,
    auth=auth,
    subscription_id=subscription_id,
    resource_group=RESOURCE_GROUP,
    create_resource_group=False,
    location=LOCATION,
)

In [None]:
# Create compute target
provisioning_config = AmlCompute.provisioning_configuration(vm_size = AML_VM_SIZE,
                                                            min_nodes = AML_MIN_NODES,
                                                            max_nodes = AML_MAX_NODES)

compute_target = ComputeTarget.create(aml_ws, AML_COMPUTE_NAME, provisioning_config)
compute_target.wait_for_completion(show_output=True)

We also generate a json config file, that we use in the next notebook to create and schedule the AML pipeline. The config file includes AML, Blob storage, and Python parameters.


In [None]:
pipeline_config = {
  "resource_group_name": RESOURCE_GROUP,
  "subscription_id": subscription_id,
  "aml_work_space": AML_WORKSPACE,
  "experiment_name": "mm_score",
  "cluster_name": AML_COMPUTE_NAME,
  "location": LOCATION,
  "blob_account": BLOB_ACCOUNT,
  "blob_key": blob_key,
  "models_blob_container": MODELS_CONTAINER,
  "data_blob_container": DATA_CONTAINER,
  "data_blob" : DATA_BLOB,
  "preds_blob_container": PREDS_CONTAINER,
  "pip_packages": PIP_PACKAGES,
  "python_version": PYTHON_VERSION,
  "python_script_name": PYTHON_SCRIPT_NAME,
  "python_script_directory": PYTHON_SCRIPT_DIRECTORY,
  "sched_frequency": SCHED_FREQUENCY,
  "sched_interval": SCHED_INTERVAL,
  "device_ids": [ 1, 2, 3 ],
  "sensors": [ 1, 2, 3, 4, 5 ]
}
with open('pipeline_config.json', 'w') as f:
    json.dump(pipeline_config, f, indent=4)

Next, we will create and schedule the scoring pipeline using AML. Follow the instructions in this [notebook](02_create_pipeline.ipynb).