# Automated ML

Import Dependencies. In the cell below, import all the dependencies that I will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
import os
import joblib
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.core.model import Model
from azureml.core.webservice import AciWebservice
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
import requests
import json
from azureml.core.conda_dependencies import CondaDependencies
import sklearn
from azureml.core.model import Model

## Dataset

### Overview

I use the Heart-Failure Dataset from Kaggle and the task is to predict the mortality of the patients based on their clinical records. I create the workspace and experiment in the Azure ML studio. I check if the computer target already exists. If it does not then it creates one. Finally, I display the first rows of the Heart-Failure Dataset.

In [2]:
#Create a workspace and an experiment in Azureml
ws = Workspace.from_config()
experiment_name = 'Heart-Failure-AutoMlProject'
project_folder = './Heart-Failure-project'

experiment=Experiment(ws, experiment_name)

#Check if the cluster exists if it does not then create one.
# choose a name for your cluster
cluster_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2020-11-17T17:29:48.557000+00:00', 'errors': None, 'creationTime': '2020-11-17T17:29:45.753595+00:00', 'modifiedTime': '2020-11-17T17:30:01.713834+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS3_V2'}


In [3]:
# Get Heart Failure Dataset and add it in pandas
ds = Dataset.get_by_name(ws, name='Heart-Failure')

In [4]:
#Review the first 5 rows in the dataset
ds.take(5).to_pandas_dataframe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [5]:
#Split Dataset to Training and Testing Datasets
training_data, test_data = ds.random_split(percentage=0.8, seed=223)

#Create the Test Data into pandas_dataframe
df_test = test_data.to_pandas_dataframe()
y_test = df_test['DEATH_EVENT']
X_test = df_test.drop(['DEATH_EVENT'], axis=1)

## AutoML Configuration
It is used an AutoML for the classification problem using the Heart Failure Dataset. The AutoML run will look for the best model in relation to the accuracy. The target feature is the "DEATH_EVENT". The experiment timeout is 30 mins and the maximum number of iterations which are executed in parallel are 5. 

In [6]:
# AutoML settings
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'
}

# AutoML config
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=training_data,
                             validation_data=test_data,
                             label_column_name="DEATH_EVENT",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [7]:
# Submit experiment
Automl_run = experiment.submit(automl_config)

Running on remote.


## Run Details

`RunDetails` widget is used to show the different experiments.

In [8]:
RunDetails(Automl_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

Getb the best model from the automl experiments and display all the properties of the model.



In [9]:
best_run, fitted_model = Automl_run.get_output()
print(best_run)
print(fitted_model)

Run(Experiment: Heart-Failure-AutoMlProject,
Id: AutoML_d9602484-520b-4a48-b3f7-50eda90d92aa_53,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                    min_impurity_split=None,
                                                                                                    min_samples_leaf=0.01,
   

In [10]:
model_name = best_run.properties['model_name']
# register model in workspace
description = 'Automated Machine Learning Model'
tags = None
auto_ml=Automl_run.register_model(model_name=model_name, description=description, tags=tags)
print(Automl_run.model_id) # Use this id to deploy the model as a web service in Azure

AutoMLd9602484553


In [32]:
#Test the Model
y_pred=fitted_model.predict(X_test[0:3])
print(y_pred)

[1 1 1]


## Model Deployment


Remember you have to deploy only one of the two models you trained. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [12]:
#Set up the Environment 
#env = Environment(workspace=ws, name="myenv")  

#Install the correct packages
#env.python.conda_dependencies.add_pip_package("scikit-learn")  
#Conda Packages
#env.python.conda_dependencies.add_conda_package("python==3.7")
#env.python.conda_dependencies.add_conda_package("scikit-learn==0.22.1")

# Pip Packages
#env.python.conda_dependencies.add_pip_package('ference-schema[numpy-support]')
#env.python.conda_dependencies.add_pip_package("numpy")
#env.python.conda_dependencies.add_pip_package("azureml-train-automl-runtime==1.15.0")
#env.python.conda_dependencies.add_pip_package("azureml-explain-model==1.15.0")
#env.python.conda_dependencies.add_pip_package("inference-schema")
#env.python.conda_dependencies.add_pip_package("azureml-defaults==1.15.0")
#env.python.conda_dependencies.add_conda_package("py-xgboost")
#env.python.conda_dependencies.add_pip_package("joblib")

In [12]:
script_file_name='inference/scope.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/scope.py')

In [14]:
#Set up the inference_config
#inference_config = InferenceConfig(entry_script='score.py', environment=env)
inference_config = InferenceConfig(entry_script=script_file_name, environment=best_run.get_environment())

In [15]:
#Local Deployment
from azureml.core.webservice import LocalWebservice
local_config = LocalWebservice.deploy_configuration(port=9000)
local_service = Model.deploy(ws, "test", [auto_ml], inference_config, local_config)
local_service.wait_for_deployment(show_output=True)


Downloading model AutoMLd9602484553:1 to /tmp/azureml_b9ggito2/AutoMLd9602484553/1
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry viennaglobal.azurecr.io
Logging into Docker registry viennaglobal.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM viennaglobal.azurecr.io/azureml/azureml_3948892c3f204a4877f0b9993b8e7795
 ---> e958b21f6008
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> 5941ff07961f
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjM3NGJkZjFhLWM2NDgtNDI0NC1hMzE3LWYwZDFlZjRiODVjNyIsInJlc291cmNlR3JvdXBOYW1lIjoiYW1sLXF1aWNrc3RhcnRzLTEyNjY1NCIsImFjY291bnROYW1lIjoicXVpY2stc3RhcnRzLXdzLTEyNjY1NCIsIndvcmtzcGFjZUlkIjoiODY2ODJjNDUtNTZkMS00MjVlLWE1YWYtYTA4M2RiNDhhM2UzIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 2538678d2dc2
 ---> b71e8aed0448
Step 4/5 : RUN mv '/var/azureml-app/tmp79bh11dz.py' /v

In [30]:
#Set up the deployment_config as webservice
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, enable_app_insights=True)

#Deploy the model
service = Model.deploy(
    workspace = ws,
    name = "mywebservice",
    models = [auto_ml],
    inference_config = inference_config,
    deployment_config = aci_config, overwrite=True)

#wait until deployment is complete
service.wait_for_deployment(show_output = True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..............................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [33]:
print(service.get_logs())

2020-11-17T18:19:58,638334381+00:00 - iot-server/run 
2020-11-17T18:19:58,639524283+00:00 - rsyslog/run 
2020-11-17T18:19:58,639840783+00:00 - gunicorn/run 
2020-11-17T18:19:58,640089384+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_0e3a8a6dba181476a2523c12c58dfc97/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [34]:
#Print the state
print(service.state)

#Print the scoring uri of the service
print(service.scoring_uri)

#Print the primary authentication key for the service
#primary, secondary = service.get_keys()
#print(primary)

Healthy
http://8b41c71b-6c41-4820-8996-340837fd2aed.southcentralus.azurecontainer.io/score


In [27]:
import pandas as pd
test_df=pd.read_csv('./test-data.csv')
test_df

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
0,75,0,582,0,20,1,265000.0,1.9,130,1,0,4
1,55,0,7861,0,38,0,263358.03,1.1,136,1,0,6
2,65,0,146,0,20,0,162000.0,1.3,129,1,1,7
3,50,1,111,0,20,0,210000.0,1.9,137,1,0,7
4,65,1,160,1,20,0,327000.0,2.7,116,0,0,8


In [28]:
import json
data= ({'data':test_df[0:3].to_dict(orient='records')})
test_sample=json.dumps(data)
output= service.run(test_sample)
print(output)

{"result": [1, 1, 1]}


In [29]:
service.delete()