# Automated ML

Importing Dependencies:

In [1]:
import logging
import os
import csv
import joblib
import requests
import json

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.model import InferenceConfig
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.core import Model, Environment
from azureml.core.webservice import AciWebservice

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.38.0


## Dataset

### Overview
The dataset chosen is the heart disease UCI data from Kaggle. The data contains some features and attributes from individuals and a classification of if they have heart disease or not. The purpose of using this dataset is to create a model that can predict if individuals are likely to have heart disease, based on the same measured features and data.


In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'automl-exp'

experiment=Experiment(ws, experiment_name)

quick-starts-ws-187769
aml-quickstarts-187769
southcentralus
81cefad3-d2c9-4f77-a466-99a7f541c7bb


In [3]:
found = False
key = "heartdisease"
description_text = ""

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

df = dataset.to_pandas_dataframe()        
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


## AutoML Configuration

This is a binary classification problem with 13 features, so a classification has been chosen. I chose accuracy as the primary metric because I will be comparing this to a logisitic regression with Hyperdrive and accuracy is a suitable metric for comparison. The timeout setting is set to 1 hour as it is unlikely that this size of dataset with this many features will need more than 1 hour to find a good model. 

In [6]:
amlcompute_cluster_name = "automl-cluster"
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Existing cluster found, this cluster will be used.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',
                                                           vm_priority = 'lowpriority',
                                                           max_nodes=5)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
    print('Cluster created successfully.')

compute_target.wait_for_completion()


New cluster created.
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded......................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [7]:
# TODO: Put your automl settings here
automl_settings = {
    "experiment_timeout_minutes": 60,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'
    }

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="target",   
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                             )

In [8]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
automl-exp,AutoML_e5ea044e-9331-43d9-a18a-e984edae10f1,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

In [15]:

RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"



In [10]:
remote_run.wait_for_completion()

{'runId': 'AutoML_e5ea044e-9331-43d9-a18a-e984edae10f1',
 'target': 'automl-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-02-28T11:08:07.474147Z',
 'endTimeUtc': '2022-02-28T11:22:59.042347Z',
 'services': {},
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'automl-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"6f359f17-6ce1-43b9-a8ae-acf87158a4c6\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.38.0", "azureml-train": "1.38.0", "azureml-train-restclients-hyperdrive": "1.38.0", "azureml-train-core": "1.38.0", "azureml-train-automl": "1.38.0", "azureml-train-automl-runtime": "1.38.0", "azureml-train-automl

## Best Model


In [13]:

# Get your best run and save the model from that run.

best_automl = remote_run.get_best_child()

run, model = remote_run.get_output()

print("Best Model Accuracy:", best_automl.get_metrics()['accuracy'])
print("Best Model Type: ", best_automl.get_details()['properties']['run_algorithm'])

Best Model Accuracy: 0.867741935483871
Best Model Type:  VotingEnsemble


In [14]:
#TODO: Save the best model
joblib.dump(model, 'aml_model.joblib')
joblib.load('aml_model.joblib')

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
)), ('randomforestclassifier', RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None, criterion='entropy', max_depth=None, max_features='log2', max_leaf_nodes=None, max_samples=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=0.035789473684210524, min_samples_split=0.056842105263157895, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False))], verbose=False))], flatten_transform=None, weights=[0.18181818181818182, 0.18181818181818182, 0.18181818181818182, 0.09090909090909091, 0.0909090909090

## Model Deployment

The deployment section was filled in and tested for this model but the logistic regression from the hyperdrive model resulted in greater accuracy, so this model was not deplooyed. 

In [77]:
environment = Environment.get(ws,"AzureML-AutoML")

model = run.register_model(model_name='automl-best-model',model_path='outputs/model.pkl')
inference_config = InferenceConfig(entry_script='score.py',
                                   environment=environment)
service_name = 'automl-deploy'
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1.8, memory_gb=4)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True
                      )
service.wait_for_deployment(show_output=True)

scoring_uri = service.scoring_uri
print(scoring_uri)
service.update(enable_app_insights=True)
service.wait_for_deployment(show_output=True)



Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-02-28 14:58:45+00:00 Creating Container Registry if not exists.
2022-02-28 14:58:45+00:00 Registering the environment.
2022-02-28 14:58:45+00:00 Use the existing image.
2022-02-28 14:58:45+00:00 Generating deployment configuration.
2022-02-28 14:58:46+00:00 Submitting deployment to compute.
2022-02-28 14:58:48+00:00 Checking the status of deployment automl-deploy..
2022-02-28 15:05:02+00:00 Checking the status of inference endpoint automl-deploy.
Succeeded
ACI service creation operation finished, operation "Succeeded"
http://488eef2d-3d34-4ddb-a3be-7d5780fe98b9.southcentralus.azurecontainer.io/score


In [None]:
data = {"data": [{"age": 63,
        "sex": 1,
        "cp": 3,
        "trestbps": 145,
        "chol": 233,
        "fbs": 1,
        "restecg": 0,
        "thalach": 150,
        "exang": 0,
        "oldpeak": 2.3,
        "slope": 0,
        "ca": 0,
        "thal": 1},

    {"age": 56,
        "sex": 1,
        "cp": 0,
        "trestbps": 125,
        "chol": 249,
        "fbs": 1,
        "restecg": 0,
        "thalach": 144,
        "exang": 1,
        "oldpeak": 1.2,
        "slope": 1,
        "ca": 1,
        "thal": 2},
      ]}
    
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
# headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())
print("Case 0: No Heart Disease, Case 1: Heart Disease.")

# test using service instance
output = service.run(input_data)
output

In [82]:
service.get_logs()

In [16]:
service.delete()
compute_target.delete()

Current provisioning state of AmlCompute is "Deleting"



**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
