# Creating Azure Resources



In [None]:
import os
from azure.core.exceptions import ResourceExistsError
from azure.storage.blob import BlobServiceClient
from azureml.core import Workspace
from azureml.core.authentication import ServicePrincipalAuthentication, AzureCliAuthentication, \
    InteractiveLoginAuthentication

import json
from datetime import datetime
from azureml.exceptions import AuthenticationException
from dotenv import find_dotenv, get_key

Start with specifying your subscription, resource group, and authentication information.

In [None]:
# Subscription & resource group
subscription_id = ""  # Name of an existing subscription 
resource_group = ""  # Name of the resource group that would be created
workspace_region = ""  # location where the resources would be created (Ex: "eastus")

Below, we retrieve the AML workspace and create a compute target.

In [None]:
def get_auth(env_path):
    if get_key(env_path, 'password') != "YOUR_SERVICE_PRINCIPAL_PASSWORD":
        aml_sp_password = get_key(env_path, 'password')
        aml_sp_tennant_id = get_key(env_path, 'tenant_id')
        aml_sp_username = get_key(env_path, 'username')
        auth = ServicePrincipalAuthentication(
            tenant_id=aml_sp_tennant_id,
            service_principal_id=aml_sp_username,
            service_principal_password=aml_sp_password
        )
    else:
        try:
            auth = AzureCliAuthentication()
            auth.get_authentication_header()
        except AuthenticationException:
            auth = InteractiveLoginAuthentication()

    return auth

# Get AML workspace
env_path = find_dotenv(raise_error_if_not_found=True)
ws = Workspace.from_config(auth=get_auth(env_path))
print(ws.name, ws.resource_group, ws.location, sep="\n")

The following are parameters needed to create and access the main Azure resources. 
You can use the default values below as is, or feel free to modify as needed.

In [None]:
now_str = datetime.now().strftime("%y%m%d%H%M%S")
# AML workspace and compute target
AML_WORKSPACE = "ws{}".format(now_str)
AML_COMPUTE_NAME = "cmp{}".format(now_str) # limit to 16 chars

# AML scheduling
SCHED_FREQUENCY = "Hour"
SCHED_INTERVAL = 1

# Scoring script
PIP_PACKAGES = ["numpy", "scipy", "scikit-learn", "pandas"]
PYTHON_VERSION = "3.6.7"
PYTHON_SCRIPT_NAME = "predict.py"
PYTHON_SCRIPT_DIRECTORY = "scripts"

# Blob storage
BLOB_ACCOUNT = "ba{}".format(now_str) # limit to 24 chars
MODELS_CONTAINER = "models"
PREDS_CONTAINER = "preds"
DATA_CONTAINER = "data"
DATA_BLOB = "sensor_data.csv"  # name of data file to be copied to blob storage


Next, we retrieve the Blob storage account and create containers.

In [None]:
blob_datastore = ws.get_default_datastore()

blob_account = blob_datastore.account_name
blob_key = blob_datastore.account_key


In [None]:
# Create models, predictions and data containers
service = BlobServiceClient(account_url="https://"+blob_account+".blob.core.windows.net/", credential=blob_key)

for container in [MODELS_CONTAINER, PREDS_CONTAINER, DATA_CONTAINER]:
    try:
        service.create_container(container)
    except ResourceExistsError: 
        print(str(container) + ' - Exists')


The following commands copy the pre-trained models and sample data from this repo to blob storage so that AML can access them during job submission.

In [None]:
from azure.storage.blob import BlobClient

data_container = service.get_container_client(DATA_CONTAINER)
with open("./data/sensor_data.csv", "rb") as data:    
    data_container.upload_blob("sensor_data.csv", data)

models_container = service.get_container_client(MODELS_CONTAINER)
for model in os.listdir("models"):
    with open("./models/"+model, "rb") as data:    
        models_container.upload_blob(model, data)

We also generate a json config file, that we use in the next notebook to create and schedule the AML pipeline. The config file includes AML, Blob storage, and Python parameters.


In [None]:
pipeline_config = {
  "resource_group_name": resource_group,
  "subscription_id": subscription_id,
  "aml_work_space": AML_WORKSPACE,
  "experiment_name": "mm_score",
  "cluster_name": AML_COMPUTE_NAME,
  "workspace_region": workspace_region,
  "blob_account": BLOB_ACCOUNT,
  "blob_key": blob_key,
  "models_blob_container": MODELS_CONTAINER,
  "data_blob_container": DATA_CONTAINER,
  "data_blob" : DATA_BLOB,
  "preds_blob_container": PREDS_CONTAINER,
  "pip_packages": PIP_PACKAGES,
  "python_version": PYTHON_VERSION,
  "python_script_name": PYTHON_SCRIPT_NAME,
  "python_script_directory": PYTHON_SCRIPT_DIRECTORY,
  "sched_frequency": SCHED_FREQUENCY,
  "sched_interval": SCHED_INTERVAL,
  "device_ids": [ 1, 2, 3 ],
  "sensors": [ 1, 2, 3, 4, 5 ]
}
with open('pipeline_config.json', 'w') as f:
    json.dump(pipeline_config, f, indent=4)

Next, we will create and schedule the scoring pipeline using AML. Follow the instructions in this [notebook](02_create_pipeline.ipynb).