## This is a standalone notebook, used for development purposes

This is a sample notebook to execute a command in the AML workspace. This notebook can be used for development purposes by the Data scientists, to try out their experiments.

In [None]:
## setup variables

In [None]:
## setup_secrets

# use `load_env` and define a sample .env template to demonstrate.

# Create a compute, or get a compute if already exists to run the command

The following block of code will help in creating a compute instance within the AML workspace. If the workspace already has a compute instance, it will return the existing compute instance.

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import AmlCompute

def create_or_get_compute(
    subscription_id: str,
    resource_group_name: str,
    workspace_name: str,
    cluster_name: str,
    cluster_size: str,
    cluster_region: str,
    min_instances: int,
    max_instances: int,
    idle_time_before_scale_down: int,
):
    compute_object = None
    try:
        client = MLClient(
            DefaultAzureCredential(),
            subscription_id=subscription_id,
            resource_group_name=resource_group_name,
            workspace_name=workspace_name,
        )
        try:
            compute_object = client.compute.get(cluster_name)
            print(f"Found existing compute target {cluster_name}, so using it.")
        except:
            print(f"{cluster_name} is not found! Trying to create a new one.")
            compute_object = AmlCompute(
                name=cluster_name,
                type="amlcompute",
                size=cluster_size,
                location=cluster_region,
                min_instances=min_instances,
                max_instances=max_instances,
                idle_time_before_scale_down=idle_time_before_scale_down,
            )
            compute_object = client.compute.begin_create_or_update(
                compute_object
            ).result()
            print(f"A new cluster {cluster_name} has been created.")
    except Exception as ex:
        print("Oops!  invalid credentials.. Try again...")
        raise
    return compute_object

# Create an environment, or get an environment

The following block of code will help in creating an environment within the AML workspace. If the workspace already has a environment, it will return the existing environment.

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Environment

def create_or_get_environment(
    subscription_id: str,
    resource_group_name: str,
    workspace_name: str,
    env_base_image_name: str,
    conda_path: str,
    environment_name: str,
    description: str,
):
    try:
        print(f"Checking {environment_name} environment.")
        client = MLClient(
            DefaultAzureCredential(),
            subscription_id=subscription_id,
            resource_group_name=resource_group_name,
            workspace_name=workspace_name,
        )
        env_docker_conda = Environment(
            image=env_base_image_name,
            conda_file=conda_path,
            name=environment_name,
            description=description,
        )
        environment = client.environments.create_or_update(env_docker_conda)
        print(f"Environment {environment_name} has been created or updated.")
        return environment

    except Exception as ex:
        print(
            "Oops! invalid credentials or error while creating ML environment.. Try again..."
        )
        raise

In [2]:
%%writefile data_prep.py
import argparse

def main(raw_data_path, prep_data_path):
    print(f"function to process raw data from: {raw_data_path} and prep data from: {prep_data_path}")
    # perform the data prep activity


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_data_path", type=str, default="../data/raw_data", help="Path to raw data",
    )
    parser.add_argument(
        "--prep_data_path", type=str, default="../data/prep_data", help="Path to prep data"
    )
    args = parser.parse_args()
    main(args.raw_data_path, args.prep_data_path)


Writing data_prep.py


In [None]:
# create a command job

from azure.ai.ml import command

def create_command(experiment_name, display_name, 
                   environment, cluster_name, tenant_id,client_id,
                   client_secret, subscription_id, resource_group_name,
                   workspace_name, deploy_environment):    
    command_job = command(
            experiment_name=experiment_name,
            display_name=display_name,
            code="./",
            command="python prep_data.py --raw_data ../data/raw_data --prep_data ../data/prep_data",
            environment=environment,
            compute=cluster_name,
            environment_variables={
                "AZURE_TENANT_ID": tenant_id,
                "AZURE_CLIENT_ID": client_id,
                "AZURE_CLIENT_SECRET": client_secret,
                "SUBSCRIPTION_ID": subscription_id,
                "RESOURCE_GROUP_NAME": resource_group_name,
                "WORKSPACE_NAME": workspace_name,
                "DEPLOY_ENVIRONMENT": deploy_environment,
            }
        )
    return command_job

In [None]:
# wait for the command job to finish

In [None]:
# orchestrate !!

# get_compute
# get_environment
# command job
# wait for the command job to get over. (optional)