##### This notebook is heavily based off: https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing/automl-classification-task-bankmarketing.ipynb 
##### Use the two in conjunction

# Install required packages

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install azure-ai-ml

In [None]:
import sys
!{sys.executable} -m pip install azureml-core

In [None]:
import sys
!{sys.executable} -m pip install azure-identity

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install azureml-mlflow

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install mlflow

In [2]:
# Import required libraries
import os
from azure.identity import DefaultAzureCredential
from azure.identity import AzureCliCredential
from azure.ai.ml import automl, Input, MLClient, command

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Data
from azure.ai.ml.automl import (
    classification,
    ClassificationPrimaryMetrics,
    ClassificationModels,
)

#### Authenticate with azure CLI (separately) then connect to the workspace 

In [6]:
credential = DefaultAzureCredential()
ml_client = None
try:
    ml_client = MLClient.from_config(credential)
except Exception as ex:
    print(ex)
    # Enter details of your AzureML workspace
    subscription_id = "d9412b06-e31c-4c66-b2c5-5e77beb91bc1"
    resource_group = "demo-rg"
    workspace = "demo-ws"
    ml_client = MLClient(credential, subscription_id, resource_group, workspace)

Found the config file in: .\config.json


In [20]:
workspace = ml_client.workspaces.get(name=ml_client.workspace_name)

subscription_id = ml_client.connections._subscription_id
resource_group = workspace.resource_group
workspace_name = ml_client.workspace_name

output = {}
output["Workspace"] = workspace_name
output["Subscription ID"] = subscription_id
output["Resource Group"] = resource_group
output["Location"] = workspace.location
output

{'Workspace': 'demo-ws',
 'Subscription ID': 'd9412b06-e31c-4c66-b2c5-5e77beb91bc1',
 'Resource Group': 'demo-rg',
 'Location': 'uksouth'}

#### Create MLTable

In [21]:
# from azure.ai.ml.constants import AssetTypes
# from azure.ai.ml import automl, Input

# # A. Create MLTable for training data from your local directory
# my_training_data_input = Input(
#     type=AssetTypes.MLTABLE, path="./data/training-mltable-folder"
# )

from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# my_path must point to folder containing MLTable artifact (MLTable file + data
# Supported paths include:
# local: './<path>'
# blob:  'https://<account_name>.blob.core.windows.net/<container_name>/<path>'
# ADLS gen2: 'abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/'
# Datastore: 'azureml://datastores/<data_store_name>/paths/<path>'

my_path = "./data/training-mltable-folder"

my_data = Data(
    path=my_path,
    type=AssetTypes.MLTABLE,
    description="ML Table",
    name="MLTable",
    version='0.1'
)

ml_client.data.create_or_update(my_data)

[32mUploading training-mltable-folder (3.62 MBs): 100%|#####################| 3623630/3623630 [00:00<00:00, 4075384.86it/s][0m
[39m



Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': ['./bank_marketing_train_data.csv'], 'type': 'mltable', 'is_anonymous': False, 'auto_increment_version': False, 'name': 'MLTable', 'description': 'ML Table', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws/data/MLTable/versions/0.1', 'Resource__source_path': None, 'base_path': 'C:\\Users\\ninad\\Documents\\Data-code\\AzureML', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x00000208ACD14D90>, 'serialize': <msrest.serialization.Serializer object at 0x00000208ACFB4B80>, 'version': '0.1', 'latest_version': None, 'path': 'azureml://subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws/datastores/workspaceblobstore/paths/LocalUpload/f69b2d0f27f7db75b3735558eafd404c/training-mltable-folder/', 'datas

#### Find an existing compute cluster or create a new one

In [22]:
from azure.ai.ml.entities import AmlCompute
from azure.core.exceptions import ResourceNotFoundError

compute_name = "cpu-cluster"

try:
    _ = ml_client.compute.get(compute_name)
    print("Found existing compute target.")
except ResourceNotFoundError:
    print("Creating a new compute target...")
    compute_config = AmlCompute(
        name=compute_name,
        type="amlcompute",
        size="STANDARD_DS12_V2",
        idle_time_before_scale_down=120,
        min_instances=0,
        max_instances=6,
    )
    ml_client.begin_create_or_update(compute_config).result()

Creating a new compute target...


#### Configure the Experiment and AutoML classification job

In [23]:
# General job parameters
max_trials = 5
exp_name = "dpv2-classifier-experiment"

In [24]:
# A. Create MLTable for training data from your local directory
my_training_data_input = Input(
    type=AssetTypes.MLTABLE, path="./data/training-mltable-folder"
)

# Create the AutoML classification job with the related factory-function.

classification_job = automl.classification(
    compute=compute_name,
    experiment_name=exp_name,
    training_data=my_training_data_input,
    target_column_name="y",
    primary_metric="accuracy",
    n_cross_validations=5,
    enable_model_explainability=True,
    tags={"my_custom_tag": "My custom value"},
)

# Limits are all optional
classification_job.set_limits(
    timeout_minutes=600,
    trial_timeout_minutes=20,
    max_trials=max_trials,
    # max_concurrent_trials = 4,
    # max_cores_per_trial: -1,
    enable_early_termination=True,
)

# Training properties are optional
classification_job.set_training(
    blocked_training_algorithms=[ClassificationModels.LOGISTIC_REGRESSION],
    enable_onnx_compatible_models=True,
)

In [25]:
# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    classification_job
)  # submit the job to the backend

print(f"Created job: {returned_job}")

Created job: compute: azureml:cpu-cluster
creation_context:
  created_at: '2023-02-26T15:34:04.097030+00:00'
  created_by: Ninad Kothmire
  created_by_type: User
display_name: happy_car_n55s8l01yk
experiment_name: dpv2-classifier-experiment
id: azureml:/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws/jobs/happy_car_n55s8l01yk
limits:
  enable_early_termination: true
  max_concurrent_trials: 1
  max_cores_per_trial: -1
  max_trials: 5
  timeout_minutes: 600
  trial_timeout_minutes: 20
log_verbosity: info
n_cross_validations: 5
name: happy_car_n55s8l01yk
outputs: {}
primary_metric: accuracy
properties: {}
resources:
  instance_count: 1
  shm_size: 2g
services:
  Studio:
    endpoint: https://ml.azure.com/runs/happy_car_n55s8l01yk?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7
    job_service_type: Studio
  Tr

In [26]:

ml_client.jobs.stream(returned_job.name)

RunId: happy_car_n55s8l01yk
Web View: https://ml.azure.com/runs/happy_car_n55s8l01yk?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws

Execution Summary
RunId: happy_car_n55s8l01yk
Web View: https://ml.azure.com/runs/happy_car_n55s8l01yk?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws



In [27]:
# Get a URL for the status of the job
returned_job.services["Studio"].endpoint

'https://ml.azure.com/runs/happy_car_n55s8l01yk?wsid=/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/workspaces/demo-ws&tid=dd1f2c2d-fea2-4bb0-a462-dfeb75d6a2e7'

#### Retrieve the Best Trial (Best Model's trial/run)

#### Obtain the tracking URI for MLFlow

In [33]:
import mlflow

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

azureml://uksouth.api.azureml.ms/mlflow/v1.0/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws


In [34]:
# Set the MLFLOW TRACKING URI

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://uksouth.api.azureml.ms/mlflow/v1.0/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws


In [35]:
from mlflow.tracking.client import MlflowClient

# Initialize MLFlow client
mlflow_client = MlflowClient()

#### Get the AutoML parent Job

In [36]:
job_name = returned_job.name

# Example if providing an specific Job name/ID
# job_name = "b4e95546-0aa1-448e-9ad6-002e3207b4fc"

# Get the parent run
mlflow_parent_run = mlflow_client.get_run(job_name)

print("Parent Run: ")
print(mlflow_parent_run)

Parent Run: 
<Run: data=<RunData: metrics={'AUC_macro': 0.944486024483466,
 'AUC_micro': 0.979510890865782,
 'AUC_weighted': 0.944486024483466,
 'accuracy': 0.9155637050619548,
 'average_precision_score_macro': 0.8240956664121315,
 'average_precision_score_micro': 0.9800525432671655,
 'average_precision_score_weighted': 0.9544761020467633,
 'balanced_accuracy': 0.7659372230508341,
 'f1_score_macro': 0.7784852371538283,
 'f1_score_micro': 0.9155637050619548,
 'f1_score_weighted': 0.9135111817630044,
 'log_loss': 0.23871814524853102,
 'matthews_correlation': 0.5583665930414543,
 'norm_macro_recall': 0.5318744461016681,
 'precision_score_macro': 0.7931849040153912,
 'precision_score_micro': 0.9155637050619548,
 'precision_score_weighted': 0.91201856475381,
 'recall_score_macro': 0.7659372230508341,
 'recall_score_micro': 0.9155637050619548,
 'recall_score_weighted': 0.9155637050619548,
 'weighted_accuracy': 0.9529272206535231}, params={}, tags={'_azureml.ComputeTargetType': 'STANDARD_DS12

In [37]:
# Print parent run tags. 'automl_best_child_run_id' tag should be there.
print(mlflow_parent_run.data.tags)

{'my_custom_tag': 'My custom value', 'model_explain_run': 'best_run', '_azureml.ComputeTargetType': 'STANDARD_DS12_V2', 'pipeline_id_000': '5dfac790c5c209f98a1da2dc1c7fb76f0397324f;c7af0367625be6ac5c2fecbfc72ed444cb7a2111;799d2168db11fc19b9e1c6c1df62f8981ad39fe9;__AutoML_Ensemble__;__AutoML_Stack_Ensemble__', 'score': '', 'predicted_cost': '', 'fit_time': '', 'training_percent': '', 'iteration': '', 'run_preprocessor': '', 'run_algorithm': '', 'automl_best_child_run_id': 'happy_car_n55s8l01yk_3', 'model_explain_best_run_child_id': 'happy_car_n55s8l01yk_3', 'mlflow.rootRunId': 'happy_car_n55s8l01yk', 'mlflow.runName': 'happy_car_n55s8l01yk', 'mlflow.user': 'Ninad Kothmire'}


In [38]:
# Get the best model's child run

best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)

best_run = mlflow_client.get_run(best_child_run_id)

print("Best child run: ")
print(best_run)

Found best child run id:  happy_car_n55s8l01yk_3
Best child run: 
<Run: data=<RunData: metrics={'AUC_macro': 0.944486024483466,
 'AUC_micro': 0.979510890865782,
 'AUC_weighted': 0.944486024483466,
 'accuracy': 0.9155637050619548,
 'average_precision_score_macro': 0.8240956664121315,
 'average_precision_score_micro': 0.9800525432671655,
 'average_precision_score_weighted': 0.9544761020467633,
 'balanced_accuracy': 0.7659372230508341,
 'f1_score_macro': 0.7784852371538283,
 'f1_score_micro': 0.9155637050619548,
 'f1_score_weighted': 0.9135111817630044,
 'log_loss': 0.23871814524853102,
 'matthews_correlation': 0.5583665930414543,
 'norm_macro_recall': 0.5318744461016681,
 'precision_score_macro': 0.7931849040153912,
 'precision_score_micro': 0.9155637050619548,
 'precision_score_weighted': 0.91201856475381,
 'recall_score_macro': 0.7659372230508341,
 'recall_score_micro': 0.9155637050619548,
 'recall_score_weighted': 0.9155637050619548,
 'weighted_accuracy': 0.9529272206535231}, params={

#### Get best model run's metrics

In [39]:
best_run.data.metrics

{'recall_score_micro': 0.9155637050619548,
 'f1_score_weighted': 0.9135111817630044,
 'weighted_accuracy': 0.9529272206535231,
 'AUC_macro': 0.944486024483466,
 'recall_score_weighted': 0.9155637050619548,
 'balanced_accuracy': 0.7659372230508341,
 'norm_macro_recall': 0.5318744461016681,
 'AUC_micro': 0.979510890865782,
 'f1_score_macro': 0.7784852371538283,
 'f1_score_micro': 0.9155637050619548,
 'precision_score_weighted': 0.91201856475381,
 'average_precision_score_micro': 0.9800525432671655,
 'matthews_correlation': 0.5583665930414543,
 'average_precision_score_weighted': 0.9544761020467633,
 'accuracy': 0.9155637050619548,
 'average_precision_score_macro': 0.8240956664121315,
 'recall_score_macro': 0.7659372230508341,
 'AUC_weighted': 0.944486024483466,
 'precision_score_macro': 0.7931849040153912,
 'precision_score_micro': 0.9155637050619548,
 'log_loss': 0.23871814524853102}

#### Download the best model locally

In [40]:
import os

# Create local folder
local_dir = "./artifact_downloads"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)

In [41]:
# Download run's artifacts/outputs
local_path = mlflow_client.download_artifacts(
    best_run.info.run_id, "outputs", local_dir
)
print("Artifacts downloaded in: {}".format(local_path))
print("Artifacts: {}".format(os.listdir(local_path)))


  local_path = mlflow_client.download_artifacts(


Artifacts downloaded in: C:\Users\ninad\Documents\Data-code\AzureML\artifact_downloads\outputs
Artifacts: ['conda_env_v_1_0_0.yml', 'engineered_feature_names.json', 'env_dependencies.json', 'featurization_summary.json', 'generated_code', 'internal_cross_validated_models.pkl', 'mlflow-model', 'model.onnx', 'model.pkl', 'model_onnx.json', 'pipeline_graph.json', 'run_id.txt', 'scoring_file_pbi_v_1_0_0.py', 'scoring_file_v_1_0_0.py', 'scoring_file_v_2_0_0.py']


In [42]:
# Show the contents of the MLFlow model folder
os.listdir("./artifact_downloads/outputs/mlflow-model")

['conda.yaml', 'MLmodel', 'model.pkl', 'python_env.yaml', 'requirements.txt']

#### Register Best Model and Deploy

In [43]:
# import required libraries
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings,
)
from azure.ai.ml.constants import ModelType

In [44]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "bankmarketing-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint for mlflow model",
    auth_mode="key",
    tags={"foo": "bar"},
)

In [45]:
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://bankmarketing-02261748651787.uksouth.inference.ml.azure.com/score', 'openapi_uri': 'https://bankmarketing-02261748651787.uksouth.inference.ml.azure.com/swagger.json', 'name': 'bankmarketing-02261748651787', 'description': 'this is a sample online endpoint for mlflow model', 'tags': {'foo': 'bar'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourcegroups/demo-rg/providers/microsoft.machinelearningservices/workspaces/demo-ws/onlineendpoints/bankmarketing-02261748651787', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/providers/Microsoft.MachineLearningServices/locations/uksouth/mfeOperationsStatus/oe:8343b37d-a835-4e2d-8040-a061b9b90fba:6d927d34-c6a5-4034-be6e-7637c4669fb4?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/d9412b06-

#### Register model

In [46]:
model_name = "bankmarketing-model"
model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/",
    name=model_name,
    description="my sample classification model",
    type=AssetTypes.MLFLOW_MODEL,
)

# for downloaded file
# model = Model(path="artifact_downloads/outputs/model.pkl", name=model_name)

registered_model = ml_client.models.create_or_update(model)

In [47]:
registered_model.id

'/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws/models/bankmarketing-model/versions/1'

In [48]:
#Deploy
deployment = ManagedOnlineDeployment(
    name="bankmarketing-deploy",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS2_V2",
    instance_count=1,
    liveness_probe=ProbeSettings(
        failure_threshold=30,
        success_threshold=1,
        timeout=2,
        period=10,
        initial_delay=2000,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=2000,
    ),
)

In [49]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

Instance type Standard_DS2_V2 may be too small for compute resources. Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: https://learn.microsoft.com/en-us/azure/machine-learning/referencemanaged-online-endpoints-vm-sku-list
Check: endpoint bankmarketing-02261748651787 exists
data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored


......................................................................................................................................................................................................................................................................................................................................................................................................................................................

ManagedOnlineDeployment({'private_network_connection': False, 'data_collector': None, 'provisioning_state': 'Succeeded', 'endpoint_name': 'bankmarketing-02261748651787', 'type': 'Managed', 'name': 'bankmarketing-deploy', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/providers/Microsoft.MachineLearningServices/locations/uksouth/mfeOperationsStatus/od:8343b37d-a835-4e2d-8040-a061b9b90fba:8c30bff0-aafa-48b1-a5bd-e5ad53a965fa?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/d9412b06-e31c-4c66-b2c5-5e77beb91bc1/resourceGroups/demo-rg/providers/Microsoft.MachineLearningServices/workspaces/demo-ws/onlineEndpoints/bankmarketing-02261748651787/deployments/bankmarketing-deploy', 'Resource__source_path': None, 'base_path': 'C:\\Users\\ninad\\Documents\\Data-code\\AzureML', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000

In [50]:
# bankmarketing deployment to take 100% traffic
endpoint.traffic = {"bankmarketing-deploy": 100}
ml_client.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x208ad1de790>

In [51]:
# test the blue deployment with some sample data
import pandas as pd

test_data = pd.read_csv("./data/test-mltable-folder/bank_marketing_test_data.csv")

test_data = test_data.drop("y", axis=1)

test_data_json = test_data.to_json(orient="records", indent=4)
data = (
    '{ \
          "input_data": {"data": '
    + test_data_json
    + "}}"
)

request_file_name = "sample-request-bankmarketing.json"

with open(request_file_name, "w") as request_file:
    request_file.write(data)

ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="bankmarketing-deploy",
    request_file=request_file_name,
)

'["no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "yes", "no", "yes", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "n

In [52]:
# wait and delete endpoint
import time

time.sleep(60)

In [53]:
# Get the details for online endpoint
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

# existing traffic details
print(endpoint.traffic)

# Get the scoring URI
print(endpoint.scoring_uri)

{'bankmarketing-deploy': 100}
https://bankmarketing-02261748651787.uksouth.inference.ml.azure.com/score


#### Delete the deployment and endpoint

In [54]:
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x208dc2a72b0>

..................................................................................................