# Automated ML

Import Dependencies.

In [1]:
import azureml
import json
import joblib
import requests
import pandas as pd
import random
from pathlib import Path
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.train.automl import AutoMLConfig
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from azureml.core.environment import Environment
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

from train import clean_data
# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

experiment_name = 'Mushroom'
amlcompute_cluster_name = "comp"
ds_key = "Mushroom"
ds_description_text = "This dataset includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms"
project_folder = './mushroom-project'

SDK version: 1.20.0


## Workspace


In [2]:
ws = Workspace.from_config()
experiment=Experiment(ws, experiment_name)

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')
run = experiment.start_logging()

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)


quick-starts-ws-139400
aml-quickstarts-139400
southcentralus
a24a24d5-8d87-4c8a-99b6-91ed2d2df51f
Creating
Succeeded.................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## Dataset

In [3]:
found = False
if ds_key in ws.datasets.keys():
        found = True
        dataset = ws.datasets[ds_key]

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/mixmasteru/MLEND-capstone/main/data/mushrooms.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=ds_key,
                                   description=ds_description_text)
df = dataset.to_pandas_dataframe()
df.describe()
dataset.take(5).to_pandas_dataframe()



# Use the clean_data function to clean your data.
x, y = clean_data(df)
x["class"] = y
x_train, x_test = train_test_split(x, train_size=0.75, test_size=0.25, random_state=101)


Path("./input").mkdir(parents=True, exist_ok=True)
pd.DataFrame(x_train).to_csv("./input/x_train.csv", index=False)
pd.DataFrame(x_train).to_csv("./input/x_test.csv", index=False)

ds = ws.get_default_datastore()
ds.upload(src_dir='./input', target_path='input', overwrite=True, show_progress=True)

x_train_data = TabularDatasetFactory.from_delimited_files(path=ds.path('input/x_train.csv'))
x_test_data = TabularDatasetFactory.from_delimited_files(path=ds.path('input/x_test.csv'))


Uploading an estimated of 2 files
Uploading ./input/x_test.csv
Uploaded ./input/x_test.csv, 1 files out of an estimated total of 2
Uploading ./input/x_train.csv
Uploaded ./input/x_train.csv, 2 files out of an estimated total of 2
Uploaded 2 files


## AutoML Configuration

In [4]:
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy',
    "featurization": 'auto'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=x_train_data,
                             validation_data=x_test_data,
                             label_column_name="class",
                             path = project_folder,
                             enable_early_stopping= True,
                             model_explainability=True,
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

## Run Details


In [5]:
remote_run = experiment.submit(automl_config)
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

Running on remote.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS

{'runId': 'AutoML_d359eaa0-48aa-408b-8e40-74fb6f77cad7',
 'target': 'comp',
 'status': 'Completed',
 'startTimeUtc': '2021-02-23T21:23:14.352866Z',
 'endTimeUtc': '2021-02-23T21:48:16.134433Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'comp',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"6125e147-1bbe-4bff-b4fd-a63aba286ad4\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"input/x_train.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-139400\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"a24a24d5-8d87-4c8a-99b6-91ed2d2df51f\\\\\\", \\\\\\"workspaceName\\\\\\

## Best Model

get the best model from the automl experiments and display all the properties of the model.



In [6]:
# Retrieve best model from Run
best_run, best_model = remote_run.get_output()
print(best_run)
print(best_run.get_metrics())
print(best_model)
#Save the best model
joblib.dump(best_model, 'outputs/automl_model.joblib')

best_model = joblib.load("./outputs/automl_model.joblib")
df_test = dataset.to_pandas_dataframe()
X_test, y_test = clean_data(df_test)

ypred = best_model.predict(X_test)
cm = confusion_matrix(y_test, ypred)

# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', low=0, high=0.9)

Package:azureml-automl-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-core, training version:1.22.0, current version:1.20.0
Package:azureml-dataprep, training version:2.9.1, current version:2.7.3
Package:azureml-dataprep-native, training version:29.0.0, current version:27.0.0
Package:azureml-dataprep-rslex, training version:1.7.0, current version:1.5.0
Package:azureml-dataset-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-defaults, training version:1.22.0, current version:1.20.0
Package:azureml-interpret, training version:1.22.0, current version:1.20.0
Package:azureml-pipeline-core, training version:1.22.0, current version:1.20.0
Package:azureml-telemetry, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-client, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-runtime, training version:1.22.0, current version:1.20.0


Run(Experiment: Mushroom,
Id: AutoML_d359eaa0-48aa-408b-8e40-74fb6f77cad7_0,
Type: azureml.scriptrun,
Status: Completed)
{'recall_score_weighted': 1.0, 'norm_macro_recall': 1.0, 'AUC_weighted': 1.0, 'f1_score_micro': 1.0, 'f1_score_weighted': 1.0, 'precision_score_macro': 1.0, 'average_precision_score_micro': 1.0, 'AUC_micro': 1.0, 'accuracy': 1.0, 'recall_score_micro': 1.0, 'average_precision_score_macro': 1.0, 'AUC_macro': 1.0, 'balanced_accuracy': 1.0, 'precision_score_micro': 1.0, 'matthews_correlation': 1.0, 'average_precision_score_weighted': 1.0, 'precision_score_weighted': 1.0, 'log_loss': 2.997299456938485e-05, 'recall_score_macro': 1.0, 'weighted_accuracy': 1.0, 'f1_score_macro': 1.0, 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_d359eaa0-48aa-408b-8e40-74fb6f77cad7_0/confusion_matrix', 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_d359eaa0-48aa-408b-8e40-74fb6f77cad7_0/accuracy_table'}
Pipeline(memory=None,
         steps=[('datatransformer'

Unnamed: 0,0,1
0,3898,0
1,0,4016


## Model Deployment
register the model

In [21]:
# Register model
model = best_run.register_model(model_name='mushrooms',
                           tags={'typ': 'voting'},
                           model_path='outputs/model.pkl')
print(model.name, model.id, model.version, sep='\t')

mushrooms	mushrooms:1	1


create an inference config and deploy the model as a web service

In [45]:
env = Environment.from_conda_specification(name="env", file_path="conda_dependencies.yml")
inference_config = InferenceConfig(entry_script="score.py", environment=env)

aci_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                               auth_enabled=True,
                                               enable_app_insights=True,
                                               memory_gb=1,
                                               tags={'name': 'mushrooms'},
                                               description='classify the edibility of mushrooms')

service = Model.deploy(workspace=ws,
                       name='mushrooms',
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config)

service.wait_for_deployment(show_output=True)
print(service.state)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...................................................................................................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


## request to the web service
send a request to the web service you deployed to test it.

In [93]:
key1, key2 = service.get_keys()
print(key1)

index = random.randint(0, len(X_test)-1)
data_dict=X_test.iloc[index].to_dict()

for k, v in data_dict.items():
    data_dict[k] = str(v)
    
data = '{"data": ['+json.dumps(data_dict)+']}'
headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}

resp = requests.post(service.scoring_uri, data, headers=headers)

print("POST to uri", service.scoring_uri)
print("input data:", data)
print("label:", y_test.iloc[index])
print("prediction:", resp.text)

CvkC2n7KIG4cWrv8No1bfOeybPhp6h0D
POST to uri http://636a3679-f175-4b15-816b-09ec09e85d8e.southcentralus.azurecontainer.io/score
input data: {"data": [{"bruises": "True", "gill_attachment": "False", "cap_shape_b": "0", "cap_shape_c": "0", "cap_shape_f": "0", "cap_shape_k": "0", "cap_shape_s": "0", "cap_shape_x": "1", "cap_surface_f": "1", "cap_surface_g": "0", "cap_surface_s": "0", "cap_surface_y": "0", "cap_color_b": "0", "cap_color_c": "0", "cap_color_e": "0", "cap_color_g": "0", "cap_color_n": "1", "cap_color_p": "0", "cap_color_r": "0", "cap_color_u": "0", "cap_color_w": "0", "cap_color_y": "0", "odor_a": "0", "odor_c": "0", "odor_f": "0", "odor_l": "0", "odor_m": "0", "odor_n": "1", "odor_p": "0", "odor_s": "0", "odor_y": "0", "gill_spacing_c": "1", "gill_spacing_w": "0", "gill_size_b": "1", "gill_size_n": "0", "gill_color_b": "0", "gill_color_e": "0", "gill_color_g": "0", "gill_color_h": "0", "gill_color_k": "0", "gill_color_n": "0", "gill_color_p": "0", "gill_color_r": "0", "gill

## Logs
print the logs of the web service and delete the service

In [69]:
service.get_logs()
#service.delete()

'2021-02-24T00:47:24,989567400+00:00 - iot-server/run \n2021-02-24T00:47:24,997864300+00:00 - nginx/run \n2021-02-24T00:47:24,989628100+00:00 - gunicorn/run \n/usr/sbin/nginx: /azureml-envs/azureml_6f3791fe7434448b4ebe2b0fd691d644/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_6f3791fe7434448b4ebe2b0fd691d644/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_6f3791fe7434448b4ebe2b0fd691d644/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_6f3791fe7434448b4ebe2b0fd691d644/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_6f3791fe7434448b4ebe2b0fd691d644/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n2021-02-24T00:47:25,006172900+00:00 - rsyslog/run 