In [1]:
# Connect to Azure Machine Learning Workspace 
import sys
import os
import json
import archai.common.azureml_helper as aml_helper

sys.path.append(os.path.realpath('scripts'))

from cnn_search_space import CNNSearchSpace

# make sure we have a scripts dir for the code to run our jobs.
import os
scripts_dir = "./scripts"
os.makedirs(scripts_dir, exist_ok=True)

config_file = "../.azureml/config.json"
config = json.load(open(config_file, 'r'))

for required_key in ['subscription_id', 'resource_group', 'workspace_name', 'storage_account_key', 'storage_account_name']:
    if not required_key in config:
        print(f"### Error: please add a {required_key} to {config_file}")

storage_account_key = config['storage_account_key']    
storage_account_name = config['storage_account_name']

In [2]:
# Get a handle to the workspace
ml_client = aml_helper.get_aml_client_from_file(config_path=config_file)

Found the config file in: ..\.azureml\config.json


In [3]:
# Create cpu cluster for running the search
cpu_compute_name = "nas-cpu-cluster-D14-v2"
aml_helper.create_compute_cluster(ml_client, cpu_compute_name, size="Standard_D14_v2", location="westus")

# Create gpu cluster for running the search
gpu_compute_name = "nas-gpu-cluster-NC6"
aml_helper.create_compute_cluster(ml_client, gpu_compute_name, size="Standard_NC6", location="westus")

You already have a cluster named nas-cpu-cluster-D14-v2, we'll reuse it as is.
You already have a cluster named nas-gpu-cluster-NC6, we'll reuse it as is.


In [4]:
archai_job_env = aml_helper.create_environment_from_file(ml_client, conda_file="conda.yaml", version='0.1.3')

Environment with name aml-archai is registered to workspace, the environment version is 0.1.3


In [5]:
from azure.ai.ml.entities import AzureBlobDatastore
from azure.ai.ml.entities._credentials import AccountKeyConfiguration

# Create a blob store container for storing our NAS generated models in.
blob_store = AzureBlobDatastore(
    name="models",
    description="Datastore pointing to our models blob container.",
    account_name=storage_account_name,
    container_name="models",
    credentials=AccountKeyConfiguration(
        account_key=storage_account_key
    ),
)

ml_client.create_or_update(blob_store)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'models', 'description': 'Datastore pointing to our models blob container.', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/c8b7f913-60fb-4759-a310-fc5630e56f99/resourceGroups/snpe-aml-rg/providers/Microsoft.MachineLearningServices/workspaces/snpe-aml-workspace/datastores/models', 'Resource__source_path': None, 'base_path': 'd:\\git\\microsoft\\archai\\docs\\advanced_guide\\cloud\\notebooks\\distributed_search', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x00000273F1710E20>, 'credentials': {'type': 'account_key'}, 'container_name': 'models', 'account_name': 'archaimnistmodels', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [25]:
from store import ArchaiStore

# make sure the datasets container exists
data_container_name = "datasets"
store = ArchaiStore(storage_account_key, storage_account_name, blob_container_name=data_container_name)
store.upload_blob("MNIST", config_file)

In [26]:
# Create a blob store container for storing our dataset and register it with AML
store.get_or_create_dataset_container()

data_store_name = "datasets"

data_store = AzureBlobDatastore(
    name=data_store_name,
    description="Datastore pointing to our dataset container.",
    account_name=storage_account_name,
    container_name=data_container_name,
    credentials=AccountKeyConfiguration(
        account_key=storage_account_key
    ),
)

ml_client.create_or_update(data_store)

AzureBlobDatastore({'type': <DatastoreType.AZURE_BLOB: 'AzureBlob'>, 'name': 'datasets', 'description': 'Datastore pointing to our dataset container.', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/c8b7f913-60fb-4759-a310-fc5630e56f99/resourceGroups/snpe-aml-rg/providers/Microsoft.MachineLearningServices/workspaces/snpe-aml-workspace/datastores/datasets', 'Resource__source_path': None, 'base_path': 'd:\\git\\microsoft\\archai\\docs\\advanced_guide\\cloud\\notebooks\\distributed_search', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000027389C81C60>, 'credentials': {'type': 'account_key'}, 'container_name': 'datasets', 'account_name': 'archaimnistmodels', 'endpoint': 'core.windows.net', 'protocol': 'https'})

In [27]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output

datastore_path = f'azureml://datastores/{data_store_name}/paths/MNIST/'

data_prep_component = command(
    name="data_prep2",
    display_name="Data preparation for training",
    description="Downloads the remote dataset to our blob store.",
    inputs= {
        "name": Input(type='string')
    },
    outputs= {
        "data": Output(type="uri_folder", path=datastore_path, mode="rw_mount")
    },

    # The source folder of the component
    code=scripts_dir,
    command="""python3 prep_data_store.py \
            --path ${{outputs.data}} \
            """,
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)

In [28]:
check_data_component = command(
    name="check_data2",
    display_name="Ensure the data prep step worked",
    description="Checks the data exists in the mounted blob store folder.",    
    inputs= {
        "data": Input(type="uri_folder")
    },

    # The source folder of the component
    code=scripts_dir,
    command="ls -R ${{inputs.data}}",
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)

In [29]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute=cpu_compute_name,
    description="Data prep pipeline2",
)
def mnist_pipeline():
    # using data_prep_function like a python call with its own inputs
    data_prep_job = data_prep_component(
        name="MNIST"
    )

    # check the dataset
    check_job = check_data_component(
        data=data_prep_job.outputs.data
    )
    
    return { "data": data_prep_job.outputs.data }

In [30]:
pipeline = mnist_pipeline()

# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    # Project's name
    experiment_name="mnist_test_run",
)


Uploading scripts (0.03 MBs): 100%|##########| 28545/28545 [00:00<00:00, 346508.78it/s]




In [31]:
import webbrowser
# open the pipeline in web browser
webbrowser.open(pipeline_job.services["Studio"].endpoint)

True