# Automated ML

Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [26]:
import requests
import json
import joblib
import os
import logging

import pandas as pd

from sklearn.model_selection import train_test_split
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace, Experiment, Dataset
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.train.automl.utilities import get_primary_metrics
from azureml.core.webservice import AciWebservice, LocalWebservice
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.model import Model
from azureml.widgets import RunDetails
from azureml.automl.core.shared import constants


ws = Workspace.from_config()

## Dataset

### Overview

In this problem, we are using [Heart Failure Prediction](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data) dataset.
from Kaggle.
The dataset has the below set of 12 features and a target variable :

- age: Age of the patient
- amaemia: Decrease of red blood cells or hemoglobin
- creatinine_phosphokinase: Level of the CPK enzyme in the blood (mcg/L)
- diabetes: If the patient has diabetes
- ejection_fraction: Percentage of blood leaving the heart at each contraction
- high_blood_pressure: If the patient has hypertension
- platelets: Platelets in the blood (kiloplatelets/mL)
- serum_creatinine: Level of serum creatinine in the blood (mg/dL)
- serum_sodium: Level of serum sodium in the blood (mEq/L)
- sex: Woman or man
- smoking: If the patient smokes or not
- time: Follow-up period (days)
- DEATH_EVENT - If the patient deceased during the follow-up period (boolean)

In [3]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'heart-failure-prediction-experiment'

experiment=Experiment(ws, experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

Workspace name: udacity-azure-ml
Azure region: eastus2
Subscription id: 7292cf53-4607-42aa-ae5d-bbb29bb79c07
Resource group: udacity-azure-ml


In [5]:
# Checking for any existing compute targets

compute_targets= ws.compute_targets

for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

azure-ml-capstone ComputeInstance Succeeded


In [6]:
# Create compute cluster
compute_cluster_name= "automl-compute"

# Check if compute cluster already exists, else create new Compute Cluster
try:
    compute_cluster=ComputeTarget(workspace=ws, name=compute_cluster_name)
    print("Found existing cluster, using it")
except ComputeTargetException:
    print("Creating new cluster")
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',max_nodes=5)
    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, compute_config)
    
compute_cluster.wait_for_completion(show_output=True)

Creating new cluster
InProgress.......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Importing Dataset

In [9]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Heart Failure Prediction dataset has been added to my GitHub repo which can be accessed here :
# https://github.com/saivarunk/udacity-ml-azure-capstone-project/blob/master/dataset/heart_failure_clinical_records_dataset.csv

path_to_data = "https://raw.githubusercontent.com/saivarunk/udacity-ml-azure-capstone-project/master/dataset/heart_failure_clinical_records_dataset.csv"

data = TabularDatasetFactory.from_delimited_files(path=path_to_data)

In [11]:
data = data.to_pandas_dataframe()

# Drop 'DEATH_EVENT' from main dataset
x = data.drop('DEATH_EVENT',axis=1)
# Create y from 'DEATH_EVENT'
y = data['DEATH_EVENT']

# Split dataset into train and test splits
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=0.20)

# concatenate to form train and test datasets 
train_df = pd.concat([x_train, y_train], axis=1)
test_df = pd.concat([x_test, y_test], axis=1)

train_df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
271,51.0,0,582,1,40,0,221000.0,0.9,134,0,0,244,0
181,59.0,1,176,1,25,0,221000.0,1.0,136,1,1,150,1
224,58.0,0,582,1,25,0,504000.0,1.0,138,1,0,205,0
12,45.0,1,981,0,30,0,136000.0,1.1,137,1,0,11,1


In [13]:
# Store dataframe as .csv and upload to datastore
if not os.path.isdir('data'):
    os.mkdir('data')

pd.DataFrame(train_df).to_csv("data/train_data.csv", index=False)
pd.DataFrame(test_df).to_csv("data/test_data.csv", index=False)

ds = ws.get_default_datastore()
ds.upload(src_dir='./data', target_path='heart-failure', overwrite=True, show_progress=True)

Uploading an estimated of 2 files
Uploading ./data/test_data.csv
Uploaded ./data/test_data.csv, 1 files out of an estimated total of 2
Uploading ./data/train_data.csv
Uploaded ./data/train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_06197ca57113463dae885aa69ac0c9ff

In [14]:
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('heart-failure/train_data.csv'))

## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [16]:
# AutoML settings
automl_settings = {
    "task": "classification",
    "debug_log": 'automl_errors.log',
    "training_data": train_data,
    "label_column_name": 'DEATH_EVENT',
    "compute_target": compute_cluster,
    "enable_early_stopping" : True,
    "experiment_timeout_minutes": 30,
    "n_cross_validations": 4,
    "featurization": 'auto',
    "primary_metric": 'accuracy',
    "verbosity": logging.INFO
}

# AutoML config initialization
automl_config = AutoMLConfig(**automl_settings)

In [17]:
# Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-prediction-experiment,AutoML_76b54900-9e91-44d7-9865-7b545508a74f,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [19]:
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model



In [20]:
best_run, fitted_model = remote_run.get_output()

best_run_metrics = best_run.get_metrics()

for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

weighted_accuracy 0.893753788036033
average_precision_score_macro 0.9186319421385046
matthews_correlation 0.7128900301749486
precision_score_macro 0.8828543846028114
precision_score_micro 0.8660310734463277
log_loss 0.4474649401921089
AUC_weighted 0.9209293384293384
AUC_macro 0.9209293384293386
f1_score_micro 0.8660310734463277
recall_score_weighted 0.8660310734463277
norm_macro_recall 0.668492355992356
balanced_accuracy 0.8342461779961781
f1_score_macro 0.8416904344257352
average_precision_score_micro 0.9099731210667553
AUC_micro 0.9067878203900539
accuracy 0.8660310734463277
f1_score_weighted 0.8584376790662731
recall_score_micro 0.8660310734463277
precision_score_weighted 0.8794675308042582
average_precision_score_weighted 0.9288430563700901
recall_score_macro 0.8342461779961781
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_76b54900-9e91-44d7-9865-7b545508a74f_28/accuracy_table
confusion_matrix aml://artifactId/ExperimentRun/dcid.AutoML_76b54900-9e91-44d7-9865-7b545508a7

In [24]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-prediction-experiment,AutoML_76b54900-9e91-44d7-9865-7b545508a74f_28,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [25]:
# Save the best model

best_run.register_model(model_name = 'automl_best_model.pkl', model_path = './outputs/')
joblib.dump(fitted_model, filename= "outputs/automl_model.pkl")

['outputs/automl_model.pkl']

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

In [27]:
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')
best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'automl_env.yml')

In [28]:
# Register the model

model_name = best_run.properties['model_name']
description = 'AutoML Model to predict heart failure data to predict death occurs or not'

tags = None
model = remote_run.register_model(model_name=model_name, description=description, tags=None)

remote_run.model_id

'AutoML76b54900928'

In [29]:
# Create inference config

script_file_name= 'inference/score.py'
inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1, 
                                               tags = {'area': "hfData", 'type': "automl_classification"}, 
                                               description = 'Heart Failure Prediction - AutoML model')

aci_service_name = 'heart-failure-prediction-automl'

aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)

print(aci_service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-09-03 10:04:20+00:00 Creating Container Registry if not exists.
2021-09-03 10:04:21+00:00 Use the existing image.
2021-09-03 10:04:21+00:00 Generating deployment configuration.
2021-09-03 10:04:22+00:00 Submitting deployment to compute.
2021-09-03 10:04:25+00:00 Checking the status of deployment heart-failure-prediction-automl..
2021-09-03 10:04:50+00:00 Checking the status of inference endpoint heart-failure-prediction-automl.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [30]:
# Enable Application Insights
aci_service.update(enable_app_insights=True)

In [32]:
print("Swagger URI " + aci_service.swagger_uri)
print("Model Endpoint URI " + aci_service.scoring_uri)

Swagger URI http://4aa64d43-489c-4d4d-90c4-b1e4c68d5643.eastus2.azurecontainer.io/swagger.json
Model Endpoint URI http://4aa64d43-489c-4d4d-90c4-b1e4c68d5643.eastus2.azurecontainer.io/score


In the cell below, send a request to the web service you deployed to test it.

In [42]:
import requests
import json

# URL for the web service, should be similar to:
# 'http://<endpoint_id>.eastus.azurecontainer.io/score'

scoring_uri = aci_service.scoring_uri # Set the scoring_uri directly from aci_service object

# Two sets of data to score, so we get two results back
data = {"data":
        [
          {
            "age": 55.0,
            "anaemia": 0,
            "creatinine_phosphokinase": 4010,
            "diabetes": 0,
            "ejection_fraction": 32,
            "high_blood_pressure": 1,
            "platelets": 234558.23,
            "serum_creatinine": 1.4,
            "serum_sodium": 125,
            "sex": 1,
            "smoking": 0,
            "time": 12
          },
      ]
    }

# Make the request and display the response
resp = requests.post(scoring_uri, json=data)
print(resp.json())

{"result": [1]}


In the cell below, print the logs of the web service and delete the service

In [37]:
# Get the deployed model endpoint logs
aci_service.get_logs()




In [43]:
# Delete the deployed service
aci_service.delete()