# Import libraries

In [1]:
# Import libraries
from azure.ai.ml import MLClient                          # Handle to the workspace
from azure.identity import DefaultAzureCredential         # Authentication package
from azure.identity import InteractiveBrowserCredential   # Authentication package
from azure.ai.ml.entities import AmlCompute               # Compute
from azure.ai.ml.entities import Environment              # Environment
from azure.ai.ml.entities import Model                    # Model
from azure.ai.ml import command                           # Job/command
from azure.ai.ml import Input                             # Data input
from azure.ai.ml.entities import ManagedOnlineEndpoint    # Manage endpoint 
from azure.ai.ml.entities import ManagedOnlineDeployment  # Manage endpoint
import uuid                                               # Create UUID
import os                                                 # System

# Connect to AzureML workspace

In [2]:
# Authenticate
credential = DefaultAzureCredential()                     # default credential
# credential = InteractiveBrowserCredential()             # browser input credential

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id="9e456110-c6ac-44a7-81cf-5b26a6935c02",
    resource_group_name="data-science-ml",
    workspace_name="big-data-science-workspace",
)

# Create compute cluster

In [3]:
# Name assigned to the compute cluster
cpu_compute_target = "starbucks-cluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")

    # Let's create the Azure ML compute object with the intended parameters
    cpu_cluster = AmlCompute(
        name = cpu_compute_target,    
        type = "amlcompute",                  # Azure ML Compute is the on-demand VM service
        size = "STANDARD_DS11_V2",            # VM Family
        min_instances = 0,                    # Minimum running nodes when there is no job running
        max_instances = 1,                    # Nodes in cluster
        idle_time_before_scale_down = 90,     # How many seconds will the node running after the job termination
        tier="Dedicated",                     # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
    )
    
    print(f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}")
    
    # Now, we pass the object to MLClient's create_or_update method
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

Creating a new cpu compute target...
AMLCompute with name starbucks-cluster will be created, with compute size STANDARD_DS11_V2


# Create environment file

In [4]:
# Create a new directory for environment file
dependencies_dir = "./dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

In [5]:
%%writefile {dependencies_dir}/conda.yml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - xlrd==2.0.1
    - mlflow== 1.26.1
    - azureml-mlflow==1.42.0
    - psutil>=5.8,<5.9
    - tqdm>=4.59,<4.60
    - ipykernel~=6.0
    - matplotlib

Overwriting ./dependencies/conda.yml


# Create custom environment

In [6]:
# Create custom environment
custom_env_name = "aml-starbucks-ml"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for Starbucks ML",
    tags={"scikit-learn": "0.24.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)

pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)
print(f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}")

Environment with name aml-starbucks-ml is registered to workspace, the environment version is 2


# Configure job

In [15]:
# Configure job
registered_model_name = "starbucks_bogo_model"

job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path="https://learningac.blob.core.windows.net/learningcontainer/df_trans_cust_bogo_succ_rate.csv",
        ),
        test_train_ratio=0.2,
        learning_rate=0.25,
        registered_model_name=registered_model_name,
    ),
    code="./src",  # location of source code
    command="python starbucks_ml_script.py --data ${{inputs.data}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="aml-starbucks-ml@latest",
    compute="starbucks-cluster",
    experiment_name="starbucks_bogo_success_prediction",
    display_name="starbucks_bogo_success_prediction"
)

# Submit job

In [16]:
# Submit job
ml_client.create_or_update(job)

[32mUploading src (0.0 MBs): 100%|██████████| 2606/2606 [00:00<00:00, 37215.04it/s]
[39m



Experiment,Name,Type,Status,Details Page
starbucks_bogo_success_prediction,frank_tooth_hm5bbm1kf9,command,Starting,Link to Azure Machine Learning studio
