In [56]:
# Connect to Azure Machine Learning Workspace 
import sys
import os
import json
import archai.common.azureml_helper as aml_helper
from azure.ai.ml.entities import UserIdentityConfiguration

sys.path.append(os.path.realpath('scripts'))

from cnn_search_space import CNNSearchSpace

# make sure we have a scripts dir for the code to run our jobs.
import os
scripts_dir = "./scripts"
os.makedirs(scripts_dir, exist_ok=True)

config_file = "../.azureml/config.json"
config = json.load(open(config_file, 'r'))

for required_key in ['subscription_id', 'resource_group', 'workspace_name', 'storage_account_key', 'storage_account_name']:
    if not required_key in config:
        print(f"### Error: please add a {required_key} to {config_file}")

storage_account_key = config['storage_account_key']    
storage_account_name = config['storage_account_name']

print(f'Using storage account: {storage_account_name}')

Using storage account: archaimnistmodels


In [57]:
# Get a handle to the workspace
ml_client = aml_helper.get_aml_client_from_file(config_path=config_file)
print(f'Using workspace "{ml_client.workspace_name}" in resource group "{ml_client.resource_group_name}"')
print(f'Using subscription "{ml_client.subscription_id}"')

Found the config file in: ..\.azureml\config.json


Using workspace "snpe-aml-workspace" in resource group "snpe-aml-rg"
Using subscription "c8b7f913-60fb-4759-a310-fc5630e56f99"


In [113]:
# Create cpu cluster for running the search
cpu_compute_name = "nas-cpu-cluster-D14-v2"
aml_helper.create_compute_cluster(ml_client, cpu_compute_name, size="Standard_D14_v2", location="westus2")

# Create gpu cluster for running the search
gpu_compute_name = "nas-gpu-cluster-NC6"
aml_helper.create_compute_cluster(ml_client, gpu_compute_name, size="Standard_NC6", location="westus2", max_instances=8)

You already have a cluster named nas-cpu-cluster-D14-v2, we'll reuse it as is.
You already have a cluster named nas-gpu-cluster-NC6, we'll reuse it as is.


In [114]:
archai_job_env = aml_helper.create_environment_from_file(ml_client, conda_file="conda.yaml", version='0.1.5')

Environment with name aml-archai is registered to workspace, the environment version is 0.1.5


In [115]:
from store import ArchaiStore

# Register the datastore with AML
data_store_name = "datasets"
data_container_name = "datasets"
model_store_name = "models"
model_container_name = "models"

# make sure the datasets container exists
store = ArchaiStore(storage_account_name, storage_account_key, blob_container_name=data_container_name)
store.upload_blob("MNIST", config_file)

# make sure the models container exists
store = ArchaiStore(storage_account_name, storage_account_key, blob_container_name=model_container_name)
store.upload_blob("config", config_file)

datastore_path = f'azureml://datastores/{data_store_name}/paths/MNIST'
results_path = f'azureml://datastores/{model_store_name}/paths/MNIST'


In [116]:
from azure.ai.ml.entities import AzureBlobDatastore
from azure.ai.ml.entities._credentials import AccountKeyConfiguration

try:
    model_store = ml_client.datastores.get(model_store_name)
except:
    # Register the blob store container for storing our NAS generated models in.
    model_store = AzureBlobDatastore(
        name=model_store_name,
        description="Datastore pointing to our models blob container.",
        account_name=storage_account_name,
        container_name=model_container_name,
        credentials=AccountKeyConfiguration(
            account_key=storage_account_key
        ),
    )

    ml_client.create_or_update(model_store)

In [117]:
try:
    data_store = ml_client.datastores.get(data_store_name)
except:
    data_store = AzureBlobDatastore(
        name=data_store_name,
        description="Datastore pointing to our dataset container.",
        account_name=storage_account_name,
        container_name=data_container_name,
        credentials=AccountKeyConfiguration(
            account_key=storage_account_key
        ),
    )

    ml_client.create_or_update(data_store)

In [118]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output

data_prep_component = command(
    name="data_prep2",
    display_name="Data preparation for training",
    description="Downloads the remote dataset to our blob store.",
    inputs= {
        "name": Input(type='string')
    },
    outputs= {
        "data": Output(type="uri_folder", path=datastore_path, mode="rw_mount")
    },

    # The source folder of the component
    code=scripts_dir,
    command="""python3 prep_data_store.py \
            --path ${{outputs.data}} \
            """,
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)

In [119]:
environment_name = f'{archai_job_env.name}:{archai_job_env.version}'
hex_config = bytes(json.dumps(config), encoding='utf-8').hex()

search_component = command(
    name="search",
    display_name="The Archai NAS search",
    description="Runs the NAS search algorithm.",    
    inputs= {
        "data": Input(type="uri_folder")
    },
    outputs= {
        "results": Output(type="uri_folder", path=results_path, mode="rw_mount")
    },
    code=scripts_dir,
    identity= UserIdentityConfiguration(),
    command='python3 search.py ' + \
            f'--data_dir {datastore_path} ' + \
            f'--output_dir {results_path} ' + \
            f'--environment "{environment_name}" ' + \
            f'--compute "{gpu_compute_name}" ' + \
            f'--config "{hex_config}"',
    environment=f"{archai_job_env.name}:{archai_job_env.version}",
)
print(f"{archai_job_env.name}:{archai_job_env.version}")
print(search_component.command)

aml-archai:0.1.5
python3 search.py --data_dir azureml://datastores/datasets/paths/MNIST --output_dir azureml://datastores/models/paths/MNIST --environment "aml-archai:0.1.5" --compute "nas-gpu-cluster-NC6" --config "7b22737562736372697074696f6e5f6964223a202263386237663931332d363066622d343735392d613331302d666335363330653536663939222c20227265736f757263655f67726f7570223a2022736e70652d616d6c2d7267222c2022776f726b73706163655f6e616d65223a2022736e70652d616d6c2d776f726b7370616365222c2022696f745f7265736f757263655f67726f7570223a2022736e70652d6465766963652d6875622d7267222c2022696f745f6875625f6e616d65223a20224d7372536e7065446576696365487562222c20226c6f636174696f6e223a202277657374757332222c202273746f726167655f6163636f756e745f6b6579223a202279485a4459454c3045774a65754c54517747395047713867564f6d78777031593836686e54367239735732666659535967686f4c496a694973712f4353454e45766471785a78546b713872482b4153747548445944773d3d222c202273746f726167655f6163636f756e745f6e616d65223a20226172636861696d6e6973746d6f64656c

In [120]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute=cpu_compute_name,
    description="Data prep pipeline2",
)
def mnist_pipeline():
    # using data_prep_function like a python call with its own inputs
    data_prep_job = data_prep_component(
        name="MNIST"
    )

    # check the dataset
    check_job = search_component(
        data=data_prep_job.outputs.data
    )
    
    return { "data": data_prep_job.outputs.data }

In [121]:
pipeline = mnist_pipeline()

# submit the pipeline job
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    # Project's name
    experiment_name="mnist_test_run",
)


[32mUploading scripts (0.05 MBs): 100%|##########| 45014/45014 [00:01<00:00, 42621.20it/s]
[39m



In [122]:
import webbrowser
# open the pipeline in web browser
webbrowser.open(pipeline_job.services["Studio"].endpoint)

True