# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import csv
import pkg_resources
import json
import requests

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt

import sklearn
from sklearn import datasets
from sklearn.metrics import confusion_matrix

import azureml.core
from azureml.core import Model
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core.dataset import Dataset
from azureml.core.environment import Environment
from azureml.core.experiment import Experiment
from azureml.core.model import InferenceConfig
from azureml.core.resource_configuration import ResourceConfiguration
from azureml.core.webservice import AciWebservice
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.57.0


## Dataset

### Overview
TODO: In this markdown cell, give an overview of the dataset you are using. Also mention the task you will be performing.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
print(
    'Workspace name: ' + ws.name, 
    'Azure region: ' + ws.location, 
    'Subscription id: ' + ws.subscription_id, 
    'Resource group: ' + ws.resource_group, sep = '\n'
    )

# Choose a name for the run history container in the workspace
experiment_name = 'heart-failure-capstone-aml-experiment'
project_folder = './Capstone-Project'

experiment = Experiment(ws, experiment_name)
print(experiment)

Workspace name: quick-starts-ws-275688
Azure region: westeurope
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-275688
Experiment(Name: heart-failure-capstone-aml-experiment,
Workspace: quick-starts-ws-275688)


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [3]:
# Choose a name for your CPU cluster
compute_cluster_name = "atul-trdigi-compute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=compute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
        #vm_priority = 'lowpriority', # optional
        min_nodes=0,
        max_nodes=5)
    compute_target = ComputeTarget.create(ws, compute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

# For a more detailed view of current AmlCompute status, use get_status()
print(compute_target.get_status().serialize())

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2025-03-17T11:09:45.900000+00:00', 'errors': None, 'creationTime': '2025-03-17T09:47:14.647405+00:00', 'modifiedTime': '2025-03-17T09:47:24.616560+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 5, 'nodeIdleTimeBeforeScaleDown': 'PT1800S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [4]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "heart-failure-ds-3"
description_text = "Heart failure clinical records dataset from the UCI repository (https://archive.ics.uci.edu/ml/datasets/Heart+failure+clinical+records)"
data_path = 'https://raw.githubusercontent.com/mishra-atul5001/MLE-with-Azure-ML/refs/heads/main/Captsone-Project/dataset/heart_failure_clinical_records_dataset.csv'

if key in ws.datasets.keys(): 
    found = True
    dataset = ws.datasets[key] 

if not found:
    # Create AML Dataset and register it into Workspace
    dataset = Dataset.Tabular.from_delimited_files(data_path)        
    # Register Dataset in Workspace
    dataset = dataset.register(
        workspace=ws,
        name=key,
        description=description_text)


df = dataset.to_pandas_dataframe()
df.info()
df.describe()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    bool   
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    bool   
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    bool   
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    bool   
 10  smoking                   299 non-null    bool   
 11  time    

Unnamed: 0,age,creatinine_phosphokinase,ejection_fraction,platelets,serum_creatinine,serum_sodium,time
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,581.839465,38.083612,263358.029264,1.39388,136.625418,130.26087
std,11.894809,970.287881,11.834841,97804.236869,1.03451,4.412477,77.614208
min,40.0,23.0,14.0,25100.0,0.5,113.0,4.0
25%,51.0,116.5,30.0,212500.0,0.9,134.0,73.0
50%,60.0,250.0,38.0,262000.0,1.1,137.0,115.0
75%,70.0,582.0,45.0,303500.0,1.4,140.0,203.0
max,95.0,7861.0,80.0,850000.0,9.4,148.0,285.0


In [5]:
# TODO: Put your automl settings here
automl_settings = {
    "experiment_timeout_minutes": 60,
    "max_concurrent_iterations": 5,
    "n_cross_validations": 5,
    "primary_metric" : 'accuracy'
}

# TODO: Put your automl config here
automl_config = AutoMLConfig(
    compute_target=compute_cluster_name,
    task="classification",
    training_data=dataset,
    label_column_name="DEATH_EVENT",   
    path=project_folder,
    enable_early_stopping=True,
    featurization='auto',
    debug_log='automl_errors.log',
    **automl_settings
    )

In [6]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on atul-trdigi-compute with default configuration
Running on remote compute: atul-trdigi-compute


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-capstone-aml-experiment,AutoML_0a66f332-39fc-4bf8-b817-352fd9e60542,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

**********************************************************************************

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [11]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)
remote_run

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-capstone-aml-experiment,AutoML_0a66f332-39fc-4bf8-b817-352fd9e60542,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

********************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more about high cardinality feat

Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-capstone-aml-experiment,AutoML_0a66f332-39fc-4bf8-b817-352fd9e60542,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation


## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [12]:
#TODO: Save the best model
best_run, fitted_model = remote_run.get_output()
remote_run.get_metrics()

{'experiment_status_description': ['Gathering dataset statistics.',
  'Generating features for the dataset.',
  'Beginning to fit featurizers and featurize the dataset.',
  'Completed fit featurizers and featurizing the dataset.',
  'Generating individually featurized CV splits.',
  'Beginning model selection.',
  'Best run model explanations started',
  'Model explanations data setup completed',
  'Choosing LightGBM as the surrogate model for explanations',
  'Computation of engineered features started',
  'Computation of engineered features completed',
  'Computation of raw features started',
  'Computation of raw features completed',
  'Best run model explanations completed'],
 'experiment_status': ['DatasetEvaluation',
  'FeaturesGeneration',
  'DatasetFeaturization',
  'DatasetFeaturizationCompleted',
  'DatasetCrossValidationSplit',
  'ModelSelection',
  'BestRunExplainModel',
  'ModelExplanationDataSetSetup',
  'PickSurrogateModel',
  'EngineeredFeatureExplanations',
  'Engineer

In [13]:
print(best_run)
print(fitted_model)

Run(Experiment: heart-failure-capstone-aml-experiment,
Id: AutoML_0a66f332-39fc-4bf8-b817-352fd9e60542_48,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, is_cross_validation=True, working_dir='/mnt/batch/tasks/shared/LS_root/mounts/clusters/notebook275688/code/Users/odl_user_275688/MLE-with-Azure-ML/Captsone-Project')),
                ('prefittedsoftvotingclassifier',
                 PreFittedSoftVotingClassifier(classification_labels=ar...roblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), reg_alpha=0.8421052631578947, reg_lambda=0.7894736842105263, subsample=0.4457894736842105))]))], flatten_transform=False, weights=[0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.2222222222222222, 0.1111111111111111]))])
Y_transformer(['LabelEncoder', LabelEncoder()])


In [14]:
dataset_test = Dataset.Tabular.from_delimited_files(path=data_path)
df_test = dataset_test.to_pandas_dataframe()
df_test = df_test[pd.notnull(df_test['DEATH_EVENT'])]

y_test = df_test['DEATH_EVENT']
X_test = df_test.drop(['DEATH_EVENT'], axis=1)

ypred = fitted_model.predict(X_test)
cm = confusion_matrix(y_test, ypred)

# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', low=0, high=0.9)

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


Unnamed: 0,0,1
0,200,3
1,3,93


## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [15]:
# Save the model, scoring script and conda environment of the best run
inference_folder_name = 'inference'
automl_model = os.path.join(inference_folder_name, 'model.pkl')
score_script = os.path.join(inference_folder_name, 'score.py')
conda_env = os.path.join(inference_folder_name, 'conda_env.yml')
best_run.download_file('outputs/model.pkl', automl_model)
best_run.download_file('outputs/scoring_file_v_1_0_0.py', score_script)
best_run.download_file('outputs/conda_env_v_1_0_0.yml', conda_env)

In [16]:
# Register the model
model = Model.register(
    workspace=ws,
    model_name=best_run.properties['model_name'],
    model_path=automl_model,
    model_framework=Model.Framework.SCIKITLEARN,
    model_framework_version=sklearn.__version__,
    description='Auto ML model predicting deaths caused by heart failure'
    )

print('Name:', model.name)
print('Version:', model.version)

Registering model AutoML0a66f332348
Name: AutoML0a66f332348
Version: 1


In [17]:
# Create inference configuration
env = Environment.from_conda_specification(name="env", file_path=conda_env)
inference_conf = InferenceConfig(entry_script=score_script, environment=env)

# Display the environment file
with open(conda_env, 'r') as file:
    env_file = file.read()
    print(env_file)

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.8 and later.
- python=3.9.19

- pip:
  - azureml-train-automl-runtime==1.57.0
  - inference-schema
  - xgboost<=1.5.2
  - azureml-interpret==1.57.0
  - azureml-defaults==1.57.0.post1
- numpy==1.23.5
- pandas==1.3.5
- scikit-learn==1.5.1
- prophet==1.1.4
- holidays==0.57
- psutil==5.9.3
channels:
- anaconda
- conda-forge



TODO: In the cell below, send a request to the web service you deployed to test it.

In [18]:
# Define deployment configuration
deployment_conf = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    description='Predicting deaths caused by heart failure',
    enable_app_insights=True)

# Deploy model as webservice using Azure Container Instance (ACI)
service_name = "aci-heart-failure-web"

service = Model.deploy(
    workspace=ws,
    name=service_name, 
    models=[model], 
    inference_config=inference_conf, 
    deployment_config=deployment_conf, 
    overwrite=True)

service.wait_for_deployment(show_output=True)

print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2025-03-17 15:02:44+00:00 Creating Container Registry if not exists.
2025-03-17 15:02:44+00:00 Registering the environment.
2025-03-17 15:02:45+00:00 Use the existing image.
2025-03-17 15:02:46+00:00 Generating deployment configuration.
2025-03-17 15:02:47+00:00 Submitting deployment to compute.
2025-03-17 15:02:51+00:00 Checking the status of deployment aci-heart-failure-web..
2025-03-17 15:11:32+00:00 Checking the status of inference endpoint aci-heart-failure-web.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [20]:
import json
import requests


test_data = json.dumps({
    "data": [{"age":75,
                "anaemia":0,
                "creatinine_phosphokinase":582,
                "diabetes":0,
                "ejection_fraction":20,
                "high_blood_pressure":1,
                "platelets":265000,
                "serum_creatinine":1.9,
                "serum_sodium":130,
                "sex":1,
                "smoking":0,
                'time':4}]})

response = requests.post(
    service.scoring_uri, 
    data=test_data, 
    headers={'Content-Type':'application/json'})

print("Results:", response.json())

Results: {"result": [true]}


TODO: In the cell below, print the logs of the web service and delete the service

In [21]:
print(service.get_logs())

/bin/bash: /azureml-envs/azureml_3997f53a14290c7447e4e58537db5370/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_3997f53a14290c7447e4e58537db5370/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_3997f53a14290c7447e4e58537db5370/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2025-03-17T15:10:45,598807368+00:00 - gunicorn/run 
2025-03-17T15:10:45,598402298+00:00 - rsyslog/run 
2025-03-17T15:10:45,602164372+00:00 | gunicorn/run | 
bash: /azureml-envs/azureml_3997f53a14290c7447e4e58537db5370/lib/libtinfo.so.6: no version information available (required by bash)
2025-03-17T15:10:45,606145389+00:00 | gunicorn/run | ###############################################
2025-03-17T15:10:45,607158613+00:00 - nginx/run 
2025-03-17T15:10:45,608205889+00:00 | gunicorn/run | AzureML Container Runtime Information
2025-03-17T15:10:45,610008664+00:00 | gunico

In [22]:
# Delete the web service and the model
service.delete()
model.delete()

Running
2025-03-17 15:13:56+00:00 Deleting service..
2025-03-17 15:14:07+00:00 Deleting service entity.
Succeeded


**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
