# Creating Azure Resources



In [None]:
from azureml.core import Workspace
from azureml.core.compute import AmlCompute, ComputeTarget
import json

Start with specifying your subscription and resource group information and log in.

In [None]:
# Subscription & resource group
SUBSCRIPTION_NAME = "Boston Team Danielle"
RESOURCE_GROUP = "bleikamlmm1"
LOCATION = "eastus"

In [None]:
# Login to subscription 
!az login --use-device-code

In [None]:
# Select subscription
!az account set -s "{SUBSCRIPTION_NAME}"

In [None]:
# Get susbcription info
temp = '"az account show -s \\"{}\\""'.format(SUBSCRIPTION_NAME)
subscription_id, tenant_id  = !eval {temp} | jq -r '.id, .tenantId'

In [None]:
# Create resource group
!az group create -l {LOCATION} -n {RESOURCE_GROUP}

The following are parameters needed to create and access the main Azure resources. 
You can use the default values below as is, or feel free to modofy as needed.

In [None]:
# AML
AML_WORKSPACE = "{}ws".format(RESOURCE_GROUP)
AML_COMPUTE_NAME = "{}cmpt".format(RESOURCE_GROUP)
AML_VM_SIZE = "Standard_D2"
AML_MIN_NODES = 2
AML_MAX_NODES = 2

# Scoring Script
PIP_PACKAGES = ["numpy", "scipy", "scikit-learn", "pandas"]
PYTHON_VERSION = "3.6.7"
PYTHON_SCRIPT_NAME = "predict.py"
PYTHON_SCRIPT_DIRECTORY = "scripts"

# Blob storage
BLOB_ACCOUNT = "{}storage".format(RESOURCE_GROUP)
MODELS_CONTAINER = "models"
PREDS_CONTAINER = "preds"
DATA_CONTAINER = "data"
DATA_BLOB = "sensor_data.csv"  # name of data file to be copied to blob storage


In [None]:
# Create Blob storage account
!az storage account create -n {BLOB_ACCOUNT} -g {RESOURCE_GROUP} -l {LOCATION}

In [None]:
# Retrieve Blob storage key
blob_key = !az storage account keys list -g {RESOURCE_GROUP} -n {BLOB_ACCOUNT} | jq -r .[0].value
blob_key = blob_key[0]

In [None]:
# Create models, predictions and data containers
!az storage container create -n {MODELS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {PREDS_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}
!az storage container create -n {DATA_CONTAINER} --account-key {blob_key} --account-name {BLOB_ACCOUNT}

The following commands copy the pre-trained models and sample data from this repo to blob storage so that AML can access them during job submission.

In [None]:
# Copy models from local dir to blob container
!az storage blob upload-batch -d {MODELS_CONTAINER} -s models --account-name {BLOB_ACCOUNT} --account-key {blob_key}
# Copy dataset to blob
!az storage blob upload -c {DATA_CONTAINER} -f data/'{DATA_BLOB}' -n '{DATA_BLOB}' --account-name {BLOB_ACCOUNT} --account-key {blob_key}

In [None]:
# Create AML Workspace
aml_ws = Workspace.create(
    name=AML_WORKSPACE,
    subscription_id=subscription_id,
    resource_group=RESOURCE_GROUP,
    create_resource_group=False,
    location=LOCATION,
)

In [None]:
# Create compute target
provisioning_config = AmlCompute.provisioning_configuration(vm_size = AML_VM_SIZE,
                                                            min_nodes = AML_MIN_NODES,
                                                            max_nodes = AML_MAX_NODES)

compute_target = ComputeTarget.create(aml_ws, AML_COMPUTE_NAME, provisioning_config)
compute_target.wait_for_completion(show_output=True)

You will also need service principal credentials for authentication. The following command creates and retrieves the credentials. For more information on service principals, check the documentation [here](https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli?view=azure-cli-latest).

In [None]:
# Create and get service principal credentials
temp = !az ad sp create-for-rbac | jq -r '.appId, .password'
sp_client, sp_secret = temp[-2:]

We also generate a json config file for the next notebook that creates and schedules the AML pipeline. The config file includes AML, Blob storage, Python, and service principal parameters.


In [99]:
pipeline_config = {
  "sp_tenant": tenant_id,
  "sp_client": sp_client,
  "sp_secret": sp_secret,
  "resource_group_name": RESOURCE_GROUP,
  "subscription_id": subscription_id,
  "aml_work_space": AML_WORKSPACE,
  "experiment_name": "mm_score",
  "cluster_name": AML_COMPUTE_NAME,
  "location": LOCATION,
  "blob_account": BLOB_ACCOUNT,
  "blob_key": blob_key,
  "models_blob_container": MODELS_CONTAINER,
  "data_blob_container": DATA_CONTAINER,
  "data_blob" : DATA_BLOB,
  "preds_blob_container": PREDS_CONTAINER,
  "pip_packages": PIP_PACKAGES,
  "python_version": PYTHON_VERSION,
  "python_script_name": PYTHON_SCRIPT_NAME,
  "python_script_directory": PYTHON_SCRIPT_DIRECTORY,
  "device_ids": [ 1, 2, 3 ],
  "tags": [ 1, 2, 3, 4, 5 ]
}
with open('pipeline_config.json', 'w') as f:
    json.dump(pipeline_config, f, indent=4)

Finally, we will create and schedule the scoring pipeline using AML. Follow the instructions [here](02_create_pipeline.ipynb).