# Automated ML

## Import libraries for Azure Machine Learning SDK

In [1]:
import os
import csv
import joblib
import logging
import sklearn
import pkg_resources

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt

from sklearn import datasets

import azureml.core
from azureml.core import Workspace, Experiment, Model

from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.webservice import LocalWebservice
from azureml.core.conda_dependencies import CondaDependencies

from azureml.train.automl import AutoMLConfig

from azureml.pipeline.steps import AutoMLStep
from azureml.contrib.pipeline.steps import ParallelRunStep
from azureml.contrib.pipeline.steps import ParallelRunConfig

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.49.0


## Initialize Workspace
Initialize a workspace object from persisted configuration. Make sure the config file is present at .\config.json

In [2]:
ws = Workspace.from_config()

print('Workspace name:\t\t'  + ws.name,
      'Resource group:\t\t'  + ws.resource_group,
      'Azure region:\t\t'    + ws.location,
      'Subscription id:\t' + ws.subscription_id, sep='\n')

Workspace name:		quick-starts-ws-237941
Resource group:		aml-quickstarts-237941
Azure region:		southcentralus
Subscription id:	a24a24d5-8d87-4c8a-99b6-91ed2d2df51f


## Create an Azure ML experiment

In [3]:
# Name for experiment
experiment_name = 'automl-heart-failure-experiment'

experiment=Experiment(ws, experiment_name)
run = experiment.start_logging()

experiment

Name,Workspace,Report Page,Docs Page
automl-heart-failure-experiment,quick-starts-ws-237941,Link to Azure Machine Learning studio,Link to Documentation


### Create or Attach an AmlCompute Target
We will need to create a compute target for our AutoML run. We will use ***vm_size = Standard_DS3_v2*** in our provisioning configuration and select ***max_nodes*** to be no greater than 4.

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Name for the CPU cluster
amlcompute_cluster_name = "automl-cpu-compute-cluster"

# Verify that cluster does not exist already
try:
    amlcompute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    amlcompute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2', max_nodes=4)
    amlcompute_target = ComputeTarget.create(ws, amlcompute_cluster_name, amlcompute_config)

amlcompute_target.wait_for_completion(show_output=True)

InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [5]:
compute_targets = ws.compute_targets

for i, key in enumerate(compute_targets):
    print(f"{i+1}. Compute target\n\tname: {compute_targets[key].name}\n\tType: {compute_targets[key].type}")

1. Compute target
	name: notebook237941
	Type: ComputeInstance
2. Compute target
	name: automl-cpu-compute-cluster
	Type: AmlCompute


In [6]:
# For a more detailed view of current AmlCompute status, use get_status().
print(amlcompute_target.get_status().serialize())

{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2023-06-30T12:25:20.607000+00:00', 'errors': None, 'creationTime': '2023-06-30T12:25:15.804562+00:00', 'modifiedTime': '2023-06-30T12:25:22.561224+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT1800S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS3_V2'}


## Dataset

### Overview
The dataset used for this project is the ***Heart Failure Clinical Records*** dataset, which can be found [here](https://archive.ics.uci.edu/ml/datasets/Heart+failure+clinical+records) in the UCI Machine Learning Repository. 

This dataset contains the medical records of 299 patients who had heart failure, collected during their follow-up period, where each patient profile has 13 clinical features.

The task we are concerned with is to predict whether the patient died during the follow-up period. We will target the DEATH_EVENT column and since it is a boolean variable, the task is binary classification.

In [7]:
from azureml.core.dataset import Dataset

# Try to load the dataset from the Workspace. Otherwise, create it from the file
description_text = "Health Failure dataset from UCI ML-Repository for mortality prediction for the Capstone Project."
key = "HealthFailure Dataset"      # the key to match the dataset name

dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv"
#dataset_url = "https://github.com/nguyenqu/nd00333-capstone/tree/master/starter_file/heart_failure_clinical_records_dataset.csv"

if key in ws.datasets.keys():
    dataset = ws.datasets[key]
    print("The Dataset was found!")
else:
    dataset = Dataset.Tabular.from_delimited_files(dataset_url) # Create AML Dataset and register it into Workspace
    dataset = dataset.register(workspace=ws, name=key, description=description_text) # Register Dataset in Workspace

df = dataset.to_pandas_dataframe()

The Dataset was found!


In [8]:
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [9]:
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


### Prepare the datasets for the Automation

In [10]:
from sklearn.model_selection import train_test_split
from azureml.data.dataset_factory import TabularDatasetFactory

# Split the dataset into training and testing datasets
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True)

if "data" not in os.listdir():
    os.mkdir("./data")

# Save training data to csv file
train_df.to_csv("./data/train_data.csv", index=False)

# Read saved training data and create a dataset in Azure ML
data_store = ws.get_default_datastore()
data_store.upload(src_dir="./data", target_path="automl_training_data")
train_ds = TabularDatasetFactory.from_delimited_files(path=[(data_store, 'automl_training_data/train_data.csv')])

"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 1 files
Target already exists. Skipping upload for automl_training_data/train_data.csv
Uploaded 0 files


### Review the Training Dataset Result

In [11]:
train_ds.take(5).to_pandas_dataframe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,50.0,0,250,0,25,0,262000.0,1.0,136,1,1,120,0
1,52.0,0,190,1,38,0,382000.0,1.0,140,1,1,258,0
2,58.0,0,132,1,38,1,253000.0,1.0,139,1,0,230,0
3,41.0,0,148,0,40,0,374000.0,0.8,140,1,1,68,0
4,75.0,0,675,1,60,0,265000.0,1.4,125,0,0,205,0


## AutoML Configuration

- ***experiment_timeout_minutes = 20***: Specifies how long (in minutes) our experiment should run. In previous projects we could not set more than 30 minutes. We could use more in this project, but it's not needed for such a small training set. To reduce the time taken to train, experiment_timeout_minutes of 20 was chosen.

- ***max_concurrent_iterations = 4***: The maximum number of iterations that could be run in parallel. It is recommended to create a dedicated cluster per experiment and adjust the number of max_concurrent_iterations of your experiment to the number of nodes in the cluster. In this way you use all nodes of the cluster at the same time with the desired number of concurrent child runs/iterations. So I set the value to 4.

- ***primary_metric = 'accuracy'***: The metric that is optimized by automated machine learning for model selection. We have set the "accuracy"/"AUC_weighted".



- ***compute_target = amlcompute_target*** : The compute target with specific vm_size and max_nodes used to run the experiment. The local compute was chosen as this may be slower but generally provides better results.

- ***task = 'classification'*** : We have a classification task to do, we want to predict whether the person will have heart failure or not. In other words, we're trying to predict the DEATH_EVENT.

- ***training_data = train_ds*** : The data (80% of the total dataset) on which used in the experiment to train the algorithm.

- ***label_column_name = "DEATH_EVENT"*** : The target variable to predict.

- ***path = project_folder*** : The full path to the Azure ML folder of the project './capstone-project'.

- ***enable_early_stopping = True*** : Early stopping is enabled so if a run is not performing well, it can stop early, again to save time and if not performing well continuing seems uncessary.

- ***featurization = 'auto'*** : indicator of whether the featurization step should be performed automatically or not, or whether a custom featurization should be used. I used "Auto" so the featurization step should be automatic.

- ***debug_log = "automl_errors.log"*** : The debug information are written to the automl_errors.log.


In [12]:
project_folder = './capstone-project'

# Define automl settings
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'AUC_weighted'
}

# Define automl configuration settings
automl_config = AutoMLConfig(compute_target = amlcompute_target,
                             task = "classification",
                             training_data = train_ds,
                             label_column_name = "DEATH_EVENT",   
                             path = project_folder,
                             enable_early_stopping = True,
                             featurization = 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [13]:
# Submit the experiment to the compute target 
automl_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on automl-cpu-compute-cluster with default configuration
Running on remote compute: automl-cpu-compute-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
automl-heart-failure-experiment,AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  In order to accurately evaluate the model(s) trained by AutoML, we leverage a dataset that the model is not trained on. Hence, if the user doesn't provide an explicit validation dataset, a part of the training dataset is used to achieve this. For smaller datasets (fewer than 20,000 samples), cross-validation is leveraged, else a single hold-out set is split from the training data to serve as the validation dataset. Hence, for your input data we leverage cross-validation with 10 folds, if the number of training samples are fewer than 1000, and 3 folds in all other cases.
              Learn mo

In [14]:
automl_run.wait_for_completion(show_output=True)

Experiment,Id,Type,Status,Details Page,Docs Page
automl-heart-failure-experiment,AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  In order to accurately evaluate the model(s) trained by AutoML, we leverage a dataset that the model is not trained on. Hence, if the user doesn't provide an explicit validation dataset, a part of the training dataset is used to achieve this. For smaller datasets (fewer than 20,000 samples), cross-validation is leveraged, else a single hold-out set is split from the training data to serve as the validation dataset. Hence, for your input data we leverage cross-validation with 10 folds, if the number of training samples are fewer than 1000, and 3 folds in all other cases.
              Learn more about cross validation: https://aka.ms/AutomatedMLCrossValidation
DETAILS:      
+------------------------------+
|Number of folds               |
|10                            |
+------------------------------+

******

{'runId': 'AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69',
 'target': 'automl-cpu-compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-06-30T12:25:48.964885Z',
 'endTimeUtc': '2023-06-30T12:41:28.41337Z',
 'services': {},
   'message': 'No scores improved over last 10 iterations, so experiment stopped early. This early stopping behavior can be disabled by setting enable_early_stopping = False in AutoMLConfig for notebook/python SDK runs.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'automl-cpu-compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"cba7fb33-a125-4ab7-a2dd-2cfeec0401b0\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependenci

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

Use the `RunDetails` widget to show the different experiments.

In [15]:
from azureml.widgets import RunDetails

RunDetails(automl_run).show()
for children_run in automl_run.get_children():
    print('-----------------------------------')
    print(children_run)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_49,
Type: azureml.scriptrun,
Status: Completed)
-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48,
Type: azureml.scriptrun,
Status: Completed)
-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_47,
Type: azureml.scriptrun,
Status: Canceled)
-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_46,
Type: azureml.scriptrun,
Status: Canceled)
-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_45,
Type: azureml.scriptrun,
Status: Canceled)
-----------------------------------
Run(Experiment: automl-heart-failure-experiment,
Id: AutoML_cb

## Best Model

Get the best model from the automl experiments and display all the properties of the model.

In [16]:
# get the best model
best_run, best_model = automl_run.get_output()

Package:azureml-automl-runtime, training version:1.51.0.post2, current version:1.49.0
Package:azureml-core, training version:1.51.0, current version:1.49.0
Package:azureml-dataprep, training version:4.10.8, current version:4.9.1
Package:azureml-dataprep-rslex, training version:2.17.12, current version:2.16.1
Package:azureml-dataset-runtime, training version:1.51.0, current version:1.49.0
Package:azureml-defaults, training version:1.51.0, current version:1.49.0
Package:azureml-interpret, training version:1.51.0, current version:1.49.0
Package:azureml-mlflow, training version:1.51.0, current version:1.49.0
Package:azureml-pipeline-core, training version:1.51.0, current version:1.49.0
Package:azureml-responsibleai, training version:1.51.0, current version:1.49.0
Package:azureml-telemetry, training version:1.51.0, current version:1.49.0
Package:azureml-train-automl-client, training version:1.51.0.post1, current version:1.49.0
Package:azureml-train-automl-runtime, training version:1.51.0.po

In [17]:
best_model

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
                 PreFittedSoftVotingClassifier(classification_labels=array([0, 1]), estimators=[('31', Pipeline(memory=None, steps=[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))), ('gradientboostingclassifier', GradientBoostingClassifier(ccp_alpha=0.0, criterion='mse', init=None, learning_rate=0.021544346900318822, loss='deviance', max_depth=3, max_features=0.1, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=0.035789473684210524, min_samples_split=0.29105263157894734, min_weight_fraction_leaf=0.0, n_estimators=600, n_iter_no_change=None, pres

In [18]:
from pprint import pprint

# parameter details of the best model
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
                e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())
            print()

print_model(best_model)

datatransformer
{'enable_dnn': False,
 'enable_feature_sweeping': True,
 'feature_sweeping_config': {},
 'feature_sweeping_timeout': 86400,
 'featurization_config': None,
 'force_text_dnn': False,
 'is_cross_validation': True,
 'is_onnx_compatible': False,
 'observer': None,
 'task': 'classification',
 'working_dir': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/notebook237941/code/Users/odl_user_237941/AutoML'}

prefittedsoftvotingclassifier
{'estimators': ['31', '26', '22', '35', '39', '25', '40', '6'],
 'weights': [0.2222222222222222,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111]}

31 - minmaxscaler
{'copy': True, 'feature_range': (0, 1)}

31 - gradientboostingclassifier
{'ccp_alpha': 0.0,
 'criterion': 'mse',
 'init': None,
 'learning_rate': 0.021544346900318822,
 'loss': 'deviance',
 'max_depth':

In [19]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-heart-failure-experiment,AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [20]:
best_run.get_tags()

{'_aml_system_azureml.automlComponent': 'AutoML',
 '_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":3,"CurrentNodeCount":4}',
 'mlflow.source.type': 'JOB',
 'mlflow.source.name': 'automl_driver.py',
 '_aml_system_codegen': 'completed',
 '_aml_system_automl_is_child_run_end_telemetry_event_logged': 'True',
 'model_explain_run_id': 'AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_ModelExplain',
 'model_explanation': 'True'}

In [21]:
best_run.get_metrics()

{'recall_score_weighted': 0.8532608695652174,
 'log_loss': 0.43684788808078723,
 'AUC_macro': 0.9359942624991968,
 'recall_score_micro': 0.8532608695652174,
 'weighted_accuracy': 0.8790241354553213,
 'precision_score_macro': 0.8700560224089635,
 'accuracy': 0.8532608695652174,
 'AUC_micro': 0.9248345935727789,
 'average_precision_score_weighted': 0.9471698453483308,
 'norm_macro_recall': 0.6247152194211018,
 'AUC_weighted': 0.9359942624991968,
 'recall_score_macro': 0.8123576097105509,
 'precision_score_weighted': 0.8685449650874842,
 'f1_score_macro': 0.8254281793399036,
 'f1_score_micro': 0.8532608695652174,
 'matthews_correlation': 0.6774783809562741,
 'balanced_accuracy': 0.8123576097105509,
 'average_precision_score_macro': 0.9355838266898318,
 'average_precision_score_micro': 0.9300728509739662,
 'f1_score_weighted': 0.8490069461942026,
 'precision_score_micro': 0.8532608695652174,
 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69

In [22]:
for primary_metric in best_run.get_metrics():
    metric=best_run.get_metrics()[primary_metric]
    print(primary_metric,metric)

recall_score_weighted 0.8532608695652174
log_loss 0.43684788808078723
AUC_macro 0.9359942624991968
recall_score_micro 0.8532608695652174
weighted_accuracy 0.8790241354553213
precision_score_macro 0.8700560224089635
accuracy 0.8532608695652174
AUC_micro 0.9248345935727789
average_precision_score_weighted 0.9471698453483308
norm_macro_recall 0.6247152194211018
AUC_weighted 0.9359942624991968
recall_score_macro 0.8123576097105509
precision_score_weighted 0.8685449650874842
f1_score_macro 0.8254281793399036
f1_score_micro 0.8532608695652174
matthews_correlation 0.6774783809562741
balanced_accuracy 0.8123576097105509
average_precision_score_macro 0.9355838266898318
average_precision_score_micro 0.9300728509739662
f1_score_weighted 0.8490069461942026
precision_score_micro 0.8532608695652174
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48/accuracy_table
confusion_matrix aml://artifactId/ExperimentRun/dcid.AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf

In [23]:
best_run.get_metrics(name='AUC_weighted')

{'AUC_weighted': 0.9359942624991968}

In [24]:
best_run.get_details()

{'runId': 'AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48',
 'target': 'automl-cpu-compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-06-30T12:40:10.670378Z',
 'endTimeUtc': '2023-06-30T12:41:18.522878Z',
 'services': {},
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': '__AutoML_Ensemble__',
  'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'AUC_weighted\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'automl-heart-failure-experiment\',\'compute_target\':\'automl-cpu-compute-cluster\',\'subscription_id\':\'a24a24d5-8d87-4c8a-99b6-91ed2d2df51f\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48","experiment_name":"automl-heart-failure-experiment","

In [25]:
best_run.get_properties()

{'runTemplate': 'automl_child',
 'pipeline_id': '__AutoML_Ensemble__',
 'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'AUC_weighted\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'automl-heart-failure-experiment\',\'compute_target\':\'automl-cpu-compute-cluster\',\'subscription_id\':\'a24a24d5-8d87-4c8a-99b6-91ed2d2df51f\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_cbf4f34d-2bd6-4a7b-9c4f-7414dbf1ee69_48","experiment_name":"automl-heart-failure-experiment","workspace_name":"quick-starts-ws-237941","subscription_id":"a24a24d5-8d87-4c8a-99b6-91ed2d2df51f","resource_group_name":"aml-quickstarts-237941"}}]}',
 'training_percent': '100',
 'predicted_cost': None,
 'iteration': '48',
 '_aml_system_scenario_identi

### Test the best model

In [26]:
# Split into x and y tests
y_test = test_df['DEATH_EVENT']
x_test = test_df.drop(['DEATH_EVENT'],axis=1)

In [27]:
from sklearn.metrics import confusion_matrix

# Test the best model and create a confusion matrix
ypred = best_model.predict(x_test)
cmatrix = confusion_matrix(y_test, ypred)

# Visualize the confusion matrix
pd.DataFrame(cmatrix).style.background_gradient(cmap='Blues', low=0, high=0.9)

Unnamed: 0,0,1
0,44,0
1,2,14


### Save the best model

In [28]:
best_run.get_file_names()

['accuracy_table',
 'automl_driver.py',
 'confusion_matrix',
 'explanation/138877f1/classes.interpret.json',
 'explanation/138877f1/eval_data_viz.interpret.json',
 'explanation/138877f1/expected_values.interpret.json',
 'explanation/138877f1/features.interpret.json',
 'explanation/138877f1/global_names/0.interpret.json',
 'explanation/138877f1/global_rank/0.interpret.json',
 'explanation/138877f1/global_values/0.interpret.json',
 'explanation/138877f1/local_importance_values.interpret.json',
 'explanation/138877f1/per_class_names/0.interpret.json',
 'explanation/138877f1/per_class_rank/0.interpret.json',
 'explanation/138877f1/per_class_values/0.interpret.json',
 'explanation/138877f1/rich_metadata.interpret.json',
 'explanation/138877f1/true_ys_viz.interpret.json',
 'explanation/138877f1/visualization_dict.interpret.json',
 'explanation/138877f1/ys_pred_proba_viz.interpret.json',
 'explanation/138877f1/ys_pred_viz.interpret.json',
 'explanation/e1f83c97/classes.interpret.json',
 'expl

In [29]:
best_run.register_model(model_name='best_run_automl', model_path='./outputs/')

Model(workspace=Workspace.create(name='quick-starts-ws-237941', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-237941'), name=best_run_automl, id=best_run_automl:4, version=4, tags={}, properties={})

In [30]:
from azureml.automl.core.shared import constants

# create inference folder
inference_folder = 'inference'
if inference_folder not in os.listdir():
    os.mkdir(inference_folder)

# Save the best model, scoring script, and conda env files in inference folder
best_run.download_file('outputs/scoring_file_v_1_0_0.py', inference_folder + '/best_automl_score.py')
best_run.download_file('outputs/conda_env_v_1_0_0.yml', inference_folder + '/automl_conda_env.yml')
best_run.download_file('outputs/model.pkl', inference_folder + '/best_automl_model.pkl')
best_run.download_file('outputs/model.pkl', inference_folder + '/model.pkl')

### Save the environment

In [31]:
from azureml.core.environment import Environment

# get the list of environments
Environment.list(workspace=ws).keys()

dict_keys(['AzureML-Triton', 'AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu', 'AzureML-VowpalWabbit-8.8.0', 'AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu', 'AzureML-ACPT-pytorch-1.12-py38-cuda11.6-gpu', 'AzureML-ACPT-pytorch-1.13-py38-cuda11.7-gpu', 'AzureML-ACPT-pytorch-1.12-py39-cuda11.6-gpu', 'AzureML-PyTorch-1.3-CPU'])

In [32]:
# save the environment
my_env = Environment.get(workspace=ws, name="AzureML-AutoML")
my_env.save_to_directory('my_env', overwrite=True)

my_env

{
    "assetId": "azureml://registries/azureml/environments/AzureML-AutoML/versions/141",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": "FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04\n\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\n\nCOPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py\n\nENV MLFLOW_MODEL_FOLDER=\"mlflow-model\"\n# ENV AML_APP_ROOT=\"/var/mlflow_resources\"\n# ENV AZUREML_ENTRY_SCRIPT=\"mlflow_score_script.py\"\n\nENV ENABLE_METADATA=true\n\n# Create conda environment\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\n    python=3.8 \\\n    pip=22.1.2 \\\n    numpy~=1.22.3

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

Register the model, create an inference config and deploy the model as a web service.

In [33]:
# Register the model
from azureml.core.resource_configuration import ResourceConfiguration

model_name = best_run.properties['model_name']
local_file = inference_folder + '/model.pkl'

run_id = best_run.id
experiment_name = best_run.experiment.name

model = Model.register(workspace = ws,
                       model_name = model_name,                        # Name of the registered model in your workspace.
                       model_path = local_file,                        # Local file to upload and register as a model.
                       model_framework = Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version = sklearn.__version__,  # Version of scikit-learn used to create the model.
                       description = 'Best autoML model to predict motality caused by heart failure.',
                       tags={'area': 'heart-failure', 'type': 'classification'})

print('Model name:', model.name)
print('Model id:', model.id)
print('Model version:', model.version)

Registering model AutoMLcbf4f34d248
Model name: AutoMLcbf4f34d248
Model id: AutoMLcbf4f34d248:1
Model version: 1


In [34]:
# create inference configuration
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

my_env = Environment.from_conda_specification(name="my_env", file_path=inference_folder + '/automl_conda_env.yml')
inference_config = InferenceConfig(entry_script=inference_folder + '/best_automl_score.py', environment=my_env)

# display the environment file 
with open(inference_folder + '/automl_conda_env.yml') as file:
    env_file = file.read()
    print(env_file)

my_env

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.8 and later.
- python=3.8.16

- pip:
  - azureml-train-automl-runtime==1.51.0.post2
  - inference-schema
  - azureml-interpret==1.51.0
  - azureml-defaults==1.51.0
- numpy==1.22.3
- pandas==1.1.5
- scikit-learn==0.22.1
- py-xgboost==1.3.3
- fbprophet==0.7.1
- holidays==0.10.3
- psutil==5.9.5
channels:
- anaconda
- conda-forge



{
    "assetId": null,
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20230120.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": "2g"
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "my_env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
              

In [35]:
# Model Deployment
from azureml.core.webservice import AciWebservice

# define deployment configuration
aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                                           memory_gb=1,
                                                           tags={'area': "heart-failure", 'type': "classification"},
                                                           description="Predict heart failure mortality using classification model",
                                                           auth_enabled=True,
                                                           enable_app_insights=True)

# deploy model as webservice using Azure Container Instance(ACI)
aci_service = Model.deploy(workspace = ws, 
                           name = "aci-heart-failure-deploy", 
                           models = [model], 
                           inference_config = inference_config, 
                           deployment_config = aci_deployment_config, 
                           overwrite=True)

aci_service.wait_for_deployment(show_output=True)



Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-06-30 12:48:24+00:00 Creating Container Registry if not exists.
2023-06-30 12:48:24+00:00 Registering the environment..
2023-06-30 12:48:39+00:00 Use the existing image.
2023-06-30 12:48:39+00:00 Generating deployment configuration.
2023-06-30 12:48:40+00:00 Submitting deployment to compute..
2023-06-30 12:48:44+00:00 Checking the status of deployment aci-heart-failure-deploy..
2023-06-30 12:51:28+00:00 Checking the status of inference endpoint aci-heart-failure-deploy.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [36]:
# get the active api endpoint for scoring
print(f"Service State: {aci_service.state}\n")
print(f"Scoring URI:   {aci_service.scoring_uri}\n")
print(f"Swagger URI:   {aci_service.swagger_uri}\n")

Service State: Healthy

Scoring URI:   http://da98daff-cb9b-4a62-bdb4-d20608a1a8f8.southcentralus.azurecontainer.io/score

Swagger URI:   http://da98daff-cb9b-4a62-bdb4-d20608a1a8f8.southcentralus.azurecontainer.io/swagger.json



## Consuming the model
Send a request to the web service you deployed to test it.

In [37]:
# Send a request to the web service
import json
import requests

# 4 sets of data to score, so we get two results back
test_sample = test_df.sample(n=4)
labels = test_sample.pop('DEATH_EVENT')


# Convert to JSON string
input_data = json.dumps({"data": test_sample.to_dict(orient='records')})
with open("input_data.json", 'w') as _f:
    _f.write(input_data)

print(input_data)
response = requests.post(aci_service.scoring_uri, data=input_data, headers={'Content-Type':'application/json'})

{"data": [{"age": 70.0, "anaemia": 0, "creatinine_phosphokinase": 212, "diabetes": 1, "ejection_fraction": 17, "high_blood_pressure": 1, "platelets": 389000.0, "serum_creatinine": 1.0, "serum_sodium": 136, "sex": 1, "smoking": 1, "time": 188}, {"age": 50.0, "anaemia": 0, "creatinine_phosphokinase": 196, "diabetes": 0, "ejection_fraction": 45, "high_blood_pressure": 0, "platelets": 395000.0, "serum_creatinine": 1.6, "serum_sodium": 136, "sex": 1, "smoking": 1, "time": 285}, {"age": 95.0, "anaemia": 1, "creatinine_phosphokinase": 371, "diabetes": 0, "ejection_fraction": 30, "high_blood_pressure": 0, "platelets": 461000.0, "serum_creatinine": 2.0, "serum_sodium": 132, "sex": 1, "smoking": 0, "time": 50}, {"age": 80.0, "anaemia": 1, "creatinine_phosphokinase": 553, "diabetes": 0, "ejection_fraction": 20, "high_blood_pressure": 1, "platelets": 140000.0, "serum_creatinine": 4.4, "serum_sodium": 133, "sex": 1, "smoking": 0, "time": 41}]}


In [38]:
##print(f"Predictions from Service: {response.json()}\n")
print(f"Data Labels: {labels.tolist()}")

Data Labels: [0, 0, 1, 1]


## Print the logs of the web service and delete the service

In [39]:
# Print the log of the webservice
print(aci_service.get_logs())

/bin/bash: /azureml-envs/azureml_ced82449b780990ade50f322ef250da8/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_ced82449b780990ade50f322ef250da8/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_ced82449b780990ade50f322ef250da8/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_ced82449b780990ade50f322ef250da8/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2023-06-30T12:51:09,214259100+00:00 - iot-server/run 
2023-06-30T12:51:09,214258400+00:00 - rsyslog/run 
bash: /azureml-envs/azureml_ced82449b780990ade50f322ef250da8/lib/libtinfo.so.6: no version information available (required by bash)
2023-06-30T12:51:09,242071300+00:00 - gunicorn/run 
2023-06-30T12:51:09,252723100+00:00 | gunicorn/run | 
2023-06-30T12:51:09,258162600+00:00 | gunicorn/run | ####################################

In [40]:
# Delete the webservice, model, and shut down the compute cluster
#aci_service.delete()
#model.delete()
#amlcompute_target.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
