# Automated ML

In [1]:
# Imports

import os
import joblib
import numpy as np
import pandas as pd
import requests
import json

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.core.model import Model, InferenceConfig
from azureml.core.webservice import AciWebservice

## Dataset

### Overview


In [2]:
# Worspace and experiment

ws = Workspace.from_config()
experiment_name = 'automl-experiment'
project_folder = './automl-project'

experiment=Experiment(ws, experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: quick-starts-ws-154849
Azure region: southcentralus
Subscription id: 5a4ab2ba-6c51-4805-8155-58759ad589d8
Resource group: aml-quickstarts-154849


In [3]:
# Create compute cluster

compute_cluster_name = "alpha"

try:
    compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)
    print("Found existing cluster, please use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, compute_config)

compute_cluster.wait_for_completion(show_output=True)

InProgress.....
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [7]:
# Dataset

found = False
key = "Heart Failure Dataset"
description_text = "Heart Failure Dataset for Udacity Project 3"

if key in ws.datasets.keys():
        found = True
        dataset = ws.datasets[key]

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/peppegili/3_Capstone_Project_ML_Engineer/master/data/heart_failure_clinical_records_dataset.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


## AutoML Configuration

In [9]:
# AutoML config

automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "n_cross_validations": 4,
    "primary_metric" : 'accuracy'
}

automl_config = AutoMLConfig(compute_target=compute_cluster,
                             task="classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",   
                             path=project_folder,
                             enable_early_stopping=True,
                             featurization='auto',
                             debug_log="automl_errors.log",
                             **automl_settings
                            )

In [10]:
# Submit the experiment
automl = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_082e177e-f879-4eb9-ba30-bdc8979214da,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

In [11]:
RunDetails(automl).show()
automl.wait_for_completion(show_output=True)

assert(automl.get_status() == "Completed")

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_082e177e-f879-4eb9-ba30-bdc8979214da,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS



## Best Model

In [13]:
best_run_automl, best_model_automl = automl.get_output()
best_run_metrics_automl = best_run_automl.get_metrics()

print("\n")
print('Best run ID: ', best_run_automl.id)
print('Best run Accuracy: ', best_run_metrics_automl['accuracy'])
print('Metrics: ', best_run_metrics_automl)
print("\n")

Package:azureml-automl-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-core, training version:1.33.0, current version:1.32.0
Package:azureml-dataprep, training version:2.20.1, current version:2.18.0
Package:azureml-dataprep-native, training version:38.0.0, current version:36.0.0
Package:azureml-dataprep-rslex, training version:1.18.0, current version:1.16.1
Package:azureml-dataset-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-defaults, training version:1.33.0, current version:1.32.0
Package:azureml-interpret, training version:1.33.0, current version:1.32.0
Package:azureml-mlflow, training version:1.33.0, current version:1.32.0
Package:azureml-pipeline-core, training version:1.33.0, current version:1.32.0
Package:azureml-responsibleai, training version:1.33.0, current version:1.32.0
Package:azureml-telemetry, training version:1.33.0, current version:1.32.0
Package:azureml-train-automl-client, training version:1.33.0, current version:1.



Best run ID:  AutoML_082e177e-f879-4eb9-ba30-bdc8979214da_38
Best run Accuracy:  0.866126126126126
Metrics:  {'matthews_correlation': 0.6871372145654745, 'norm_macro_recall': 0.6531370759654658, 'average_precision_score_macro': 0.8827546675215476, 'recall_score_micro': 0.866126126126126, 'f1_score_micro': 0.866126126126126, 'AUC_weighted': 0.9111851379683301, 'precision_score_macro': 0.8620010449320794, 'recall_score_weighted': 0.866126126126126, 'AUC_micro': 0.9141805372940508, 'precision_score_weighted': 0.8718187513532341, 'accuracy': 0.866126126126126, 'f1_score_macro': 0.8354493907625058, 'precision_score_micro': 0.866126126126126, 'weighted_accuracy': 0.8929963269988462, 'recall_score_macro': 0.8265685379827329, 'f1_score_weighted': 0.8610660544772686, 'AUC_macro': 0.91118513796833, 'log_loss': 0.4219576391808034, 'balanced_accuracy': 0.8265685379827329, 'average_precision_score_micro': 0.9156178252623035, 'average_precision_score_weighted': 0.9123732365805568, 'accuracy_table'

In [17]:
print(best_model_automl)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
), random_state=0, reg_alpha=2.3958333333333335, reg_lambda=1.5625, subsample=0.5, tree_method='auto'))], verbose=False))], flatten_transform=None, weights=[0.09090909090909091, 0.18181818181818182, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091, 0.09090909090909091]))],
         verbose=False)


In [18]:
# Save the best model
joblib.dump(best_model_automl, filename='outputs/best_model_automl.pkl')

['outputs/best_model_automl.pkl']

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

Register the model, create an inference config and deploy the model as a web service.

In [22]:
# Register the best model
#best_run_automl.upload_file('outputs/best_model_automl.pkl', 'outputs/best_model_automl.pkl')
#model = best_run_automl.register_model(model_name='best_model_automl', model_path='outputs/best_model_automl.pkl')

# Deploy the model
service_name = 'automl-deploy'
inference_config = InferenceConfig(entry_script='score.py', environment=best_run_automl.get_environment())
aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, 
                                                           memory_gb=1,
                                                           auth_enabled=True,
                                                           enable_app_insights=True,
                                                           description='AutoML model deploy')

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_deployment_config,
                       overwrite=True
                      )

service.wait_for_deployment(show_output=True)


print('Deployment state: ', service.state)
print('Scoring URI: ', service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-08-18 11:47:16+00:00 Creating Container Registry if not exists..
2021-08-18 11:57:16+00:00 Registering the environment.
2021-08-18 11:57:17+00:00 Use the existing image.
2021-08-18 11:57:18+00:00 Generating deployment configuration..
2021-08-18 11:57:19+00:00 Submitting deployment to compute.
2021-08-18 11:57:22+00:00 Checking the status of deployment automl-deploy..
2021-08-18 12:01:47+00:00 Checking the status of inference endpoint automl-deploy.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Deployment state:  Healthy
Scoring URI:  http://c55aee43-1cff-46b2-b394-95a2e22cedbe.southcentralus.azurecontainer.io/score


Send a request to the web service you deployed to test it.

In [None]:
# Send a POST requests to the web service

# URL for the web service
scoring_uri = str(service.scoring_uri)
# If the service is authenticated, set the key or token
primary, secondary = service.get_keys()
key = str(primary)

# Two sets of data to score, so we get two results back
data = {"data":
        [
          {
            'age': 50.0, 
            'anaemia': 1, 
            'creatinine_phosphokinase': 230,
            'diabetes': 0,
            'ejection_fraction': 38,
            'high_blood_pressure': 1,
            'platelets': 390000.0,
            'serum_creatinine': 1.8,
            'serum_sodium': 135,
            'sex': 1,
            'smoking': 0,
            'time': 14
          }
    ]
}

# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)
print(resp.json())

Print the logs of the web service and delete the service

In [None]:
# Logs of the web service
print(service.get_logs())

# Delete the service
service.delete()