# Automated ML

In [1]:
import azureml.core
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core.model import Model
from sklearn.model_selection import train_test_split
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
import numpy as np
import pandas as pd


## Setup

In [2]:
ws = Workspace.from_config()
experiment_name = 'capstone'
experiment=Experiment(ws, experiment_name)

In [3]:
amlcompute_cluster_name = "capstonecompute"

try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)


## Dataset

### Overview
TODO: In this markdown cell, give an overview of the dataset you are using. Also mention the task you will be performing.


In [4]:
# test to see if dataset is in store
key = 'heartfailuredataset'
if key in ws.datasets.keys(): 
    dataset = ws.datasets[key] 
    print('Found dataset')

# if not, load the dataset, save it to the store
else:
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv'
    dataset = Dataset.Tabular.from_delimited_files(url)
    dataset = dataset.register(workspace=ws, name=key)
    
               
# split into data and test sets
ds = dataset.to_pandas_dataframe()
# use train test split
train, test = train_test_split(ds, random_state=625, shuffle=True)

# then write the train data to a file and load to datastore
train.to_csv('train.csv', index=False)
datastore = ws.get_default_datastore()
datastore.upload_files(files = ['./train.csv'])

# then read to tabular dataset for automl
train = Dataset.Tabular.from_delimited_files(path = [(datastore,'train.csv')])


Uploading an estimated of 1 files
Uploading ./train.csv
Uploaded ./train.csv, 1 files out of an estimated total of 1
Uploaded 1 files


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [5]:
automl_settings = {
    "experiment_timeout_minutes":30,
    "enable_early_stopping":True,    
    "primary_metric":'accuracy',
    "n_cross_validations":5,    
}

automl_config = AutoMLConfig(
    compute_target=compute_target, 
    task='classification', 
    training_data=train,
    label_column_name='DEATH_EVENT',
    **automl_settings)

In [7]:
run = experiment.submit(automl_config)

Running on remote.


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?


In [8]:
RunDetails(run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [9]:
run.wait_for_completion()

{'runId': 'AutoML_93f0cf59-fbae-4121-a2d6-50c663d4a231',
 'target': 'capstonecompute',
 'status': 'Completed',
 'startTimeUtc': '2021-01-15T21:10:31.570689Z',
 'endTimeUtc': '2021-01-15T22:13:37.421233Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'capstonecompute',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"8e41e71c-2f0a-4e43-b668-60e962303106\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"train.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-134813\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30\\\\\\", \\\\\\"worksp

## Best Model

In [10]:
best_run, model = run.get_output()

In [11]:
print(best_run)

Run(Experiment: capstone,
Id: AutoML_93f0cf59-fbae-4121-a2d6-50c663d4a231_5,
Type: azureml.scriptrun,
Status: Completed)


In [12]:
print(model)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                    min_samples_leaf=0.01,
                                                                                                    min_samples_split=0.10368421052631578,
                                                                                                    min_weight_fraction_leaf=0.0,


In [12]:
best_run.get_tags()

{'_aml_system_azureml.automlComponent': 'AutoML',
 '_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}',
 'ensembled_iterations': '[4, 2, 0, 3, 1]',
 'ensembled_algorithms': "['RandomForest', 'RandomForest', 'LightGBM', 'RandomForest', 'XGBoostClassifier']",
 'ensemble_weights': '[0.2222222222222222, 0.2222222222222222, 0.1111111111111111, 0.3333333333333333, 0.1111111111111111]',
 'best_individual_pipeline_score': '0.8436363636363635',
 'best_individual_iteration': '4',
 '_aml_system_automl_is_child_run_end_telemetry_event_logged': 'True'}

In [13]:
best_run.get_metrics(name='accuracy')

{'accuracy': 0.8614141414141414}

### Test the model

In [14]:
#save the first two rows in a variable called tosend to use to test deployment and drop the output value
tosend = test[:2].values.tolist()
tosend = [tosend[0][:-1],tosend[1][:-1]]

In [15]:
# Split into x and y tests
y_test = test['DEATH_EVENT']
x_test = test.drop(['DEATH_EVENT'],axis=1)

In [16]:
# test the model and create a confusion matrix
from sklearn.metrics import confusion_matrix
ypred = model.predict(x_test)
cmatrix = confusion_matrix(y_test, ypred)
pd.DataFrame(cmatrix)

Unnamed: 0,0,1
0,42,7
1,7,19


### Register the model

In [17]:
#registering the model
# need to add properties, do later.

model = run.register_model(model_name ='automl_model')



In [18]:
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-134813', subscription_id='61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30', resource_group='aml-quickstarts-134813'), name=automl_model, id=automl_model:1, version=1, tags={}, properties={})


In [None]:
# print the model path
model.get_model_path(model_name='automl_model',version=1,workspace=ws)

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

### Save the environment

In [19]:
# get the list of environments
Environment.list(workspace=ws).keys()


dict_keys(['AzureML-VowpalWabbit-8.8.0', 'AzureML-AutoML', 'AzureML-Hyperdrive-ForecastDNN', 'AzureML-PyTorch-1.4-CPU', 'AzureML-PyTorch-1.4-GPU', 'AzureML-AutoML-DNN', 'AzureML-AutoML-DNN-GPU', 'AzureML-Designer-Transform', 'AzureML-Designer', 'AzureML-AutoML-GPU', 'AzureML-Designer-IO', 'AzureML-Dask-CPU', 'AzureML-Designer-R', 'AzureML-Dask-GPU', 'AzureML-Designer-Recommender', 'AzureML-PyTorch-1.5-CPU', 'AzureML-PyTorch-1.5-GPU', 'AzureML-Sidecar', 'AzureML-Designer-CV-Transform', 'AzureML-Designer-Score', 'AzureML-Designer-PyTorch', 'AzureML-Designer-CV', 'AzureML-TensorFlow-2.1-GPU', 'AzureML-TensorFlow-2.1-CPU', 'AzureML-Designer-PyTorch-Train', 'AzureML-AutoML-DNN-Vision-GPU', 'AzureML-Designer-VowpalWabbit', 'AzureML-TensorFlow-2.2-GPU', 'AzureML-TensorFlow-2.2-CPU', 'AzureML-PyTorch-1.6-CPU', 'AzureML-PyTorch-1.6-GPU', 'AzureML-Triton', 'AzureML-TensorFlow-2.3-CPU', 'AzureML-TensorFlow-2.3-GPU', 'AzureML-DeepSpeed-0.3-GPU', 'AzureML-TensorFlow-2.0-CPU', 'AzureML-Chainer-5.1.0

In [20]:
#remember to insert environment name
my_env = Environment.get(workspace=ws, name="AzureML-AutoML")

#conda_dependencies = CondaDependencies()
#conda_dependencies.add_pip_package("azureml-defaults")
#conda_dependencies.add_conda_package("py-xgboost")

# writing the environment file
with open('automl_env.yml',"w") as file:
    f.write(my_env.serialize_to_string())
    


### Create a scoring script

In [25]:
%%writefile score.py

import os
import numpy as np
import json
import joblib

def init():
    global model
    model_path = os.join(os.getenv('AZUREML_MODEL_DIR'),'automl_model.pkl')
    model = joblib.load(model_path)

def run(data):
    try:
        data = np.array(json.loads(data))
        result = model.predict(data)
        return result.tolist()
    except Exception as err:
        return str(err)

Writing score.py


In [22]:
# create inference_config
from azureml.core.model import InferenceConfig
inference_config = InferenceConfig(entry_script="score.py", environment = my_env)

### Deploy locally and test for testing purposes

In [None]:
from azureml.core.webservice import LocalWebService
deployment_config = LocalWebService.deploy_configuration(port=9000)

# can I use Model or do I have to use model
service = Model.deploy(workspace=ws, name="test-service",
                       models=[model],
                       inference_config=inference_config, deployment_config=deployment_config)

In [None]:
data = json.dumps({"data":tosend})
print(service.run(input_data=data))

In [None]:
service.delete()

### Deploy as a webservice

In [None]:
#set deployment_config for webservice deployment
from azureml.core.webservice import Webservice, AciWebservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb = 1)

In [26]:
#deploy the model
service=Model.deploy(workspace=ws,
                    name="mywebservice",
                    models=[model],
                    inference_config=inference_config,
                    deployment_config=deployment_config)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.................................................................
Failed


ERROR:azureml.core.webservice.webservice:Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: e845147b-427f-4fe8-a202-95918906d5e6
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: mywebservice. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for mor

WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: e845147b-427f-4fe8-a202-95918906d5e6
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: mywebservice. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: mywebservice. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information."
    },
    {
      "code": "AciDeploymentFailed",
      "message": "Your container application crashed. Please follow the steps to debug:\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\"restartCount\":3,\"currentState\":{\"state\":\"Waiting\",\"startTime\":null,\"exitCode\":null,\"finishTime\":null,\"detailStatus\":\"CrashLoopBackOff: Back-off restarting failed\"},\"previousState\":{\"state\":\"Terminated\",\"startTime\":\"2021-01-15T22:32:05.011Z\",\"exitCode\":111,\"finishTime\":\"2021-01-15T22:32:09.582Z\",\"detailStatus\":\"Error\"},\"events\":null}"
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: e845147b-427f-4fe8-a202-95918906d5e6\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: mywebservice. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: mywebservice. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_1b9697d50bbbb35eb098c299c7ed3dd0 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\"\n    },\n    {\n      \"code\": \"AciDeploymentFailed\",\n      \"message\": \"Your container application crashed. Please follow the steps to debug:\\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\\\"restartCount\\\":3,\\\"currentState\\\":{\\\"state\\\":\\\"Waiting\\\",\\\"startTime\\\":null,\\\"exitCode\\\":null,\\\"finishTime\\\":null,\\\"detailStatus\\\":\\\"CrashLoopBackOff: Back-off restarting failed\\\"},\\\"previousState\\\":{\\\"state\\\":\\\"Terminated\\\",\\\"startTime\\\":\\\"2021-01-15T22:32:05.011Z\\\",\\\"exitCode\\\":111,\\\"finishTime\\\":\\\"2021-01-15T22:32:09.582Z\\\",\\\"detailStatus\\\":\\\"Error\\\"},\\\"events\\\":null}\"\n    }\n  ]\n}"
    }
}

In [27]:
print(service.get_logs())

None


In [None]:
key, _ = service.get_keys()

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
url = service.scoring_uri

In [None]:
import requests
import json

#data = json.dumps({"data":tosend})
headers = {'Content-Type':'application/json'}
headers['Authorization'] = f'Bearer {key}'
response = requests.post(url,data,headers=headers)
print(response.text)


### Remove the webservice, models, and shut down the compute cluster

In [41]:
service.delete()
model.delete()
run.delete()
experiment.delete()

NameError: name 'service' is not defined

In [None]:
compute_target.delete()
