# AUTOML EXPERIMENT

In [1]:
import azureml.core
from azureml.core.workspace import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Experiment
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.core.model import Model
from azureml.pipeline.core import Pipeline
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import ParallelRunStep, ParallelRunConfig
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.datastore import Datastore
from azureml.automl.runtime.onnx_convert import OnnxConverter
from azureml.train.automl import constants
import onnxruntime
import pandas as pd
import joblib
import os
import sys
import json
import requests

In [2]:
# MODELS
if "models" not in os.listdir():
    os.mkdir("./models")
# OUTPUS
if "outputs" not in os.listdir():
    os.mkdir("./outputs")

# WORKSPACE AND EXPERIMENT

In [3]:
# load workspace   (DON'T FORGET TO DOWNLOAD CONFIG.JSON)
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: quick-starts-ws-128108
Azure region: southcentralus
Resource group: aml-quickstarts-128108


In [4]:
# Create experiment
experiment_name = 'deployments-bankmarketing'
project_folder = './deployments-project'
experiment = Experiment(ws, experiment_name)

# LOAD DATASET AND PRESERVE DATA FOR INFERENCE

In [5]:
# Create Dataset and register it into Workspace unless it´s already registered
found = False
key = "BankMarketing Dataset"
description_text = "Bank Marketing DataSet for Udacity Course 2"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
    example_data = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
    dataset = Dataset.Tabular.from_delimited_files(example_data)        
    dataset = dataset.register(workspace=ws,name=key,description=description_text)

In [6]:
# Keep 0.1% of the data for batch inference
train, examples = dataset.random_split(0.999,seed=42)
# Drop y from inference sample
examples = examples.drop_columns('y')

# AUTOML EXPERIMENT

In [7]:
# create or load compute cluster
cluster_name = "aml-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D12_V2', min_nodes=1, max_nodes=5)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)  

Creating a new compute target...
Creating
Succeeded.............
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [8]:
# Configure AutoML
automl_settings = {"experiment_timeout_minutes": 15,
                    "max_concurrent_iterations": 5,
                    "primary_metric" : 'AUC_weighted'}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=train,
                             label_column_name="y",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             enable_onnx_compatible_models=True,
                             **automl_settings)

In [9]:
# Run the experiment and show details
deployments_run = experiment.submit(automl_config, show_output = True)
#RunDetails(deployments_run).show()

Running on remote.
Running on remote compute: aml-cluster
Parent Run ID: AutoML_e28797ba-b58f-40d7-ade4-691014ee52eb

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  Your input data has been split into a training dataset and a holdout test dataset for validation of the model. The test holdout dataset reflects the original distribution of your input data.
              
DETAILS:      
+---------------------------------+---------------------------------+---------------------------------+
|Dataset                          |Row counts                       |Pe

## REGISTER BEST MODEL AND BEST ONNX MODEL

In [10]:
# Register the best model
model = deployments_run.register_model(model_name='deployments-bankmarketing')
# Save the model
best_run1, fitted_model = deployments_run.get_output()
joblib.dump(fitted_model, filename='models/base_model.joblib')

['models/base_model.joblib']

In [11]:
# Get the best ONNX model
best_run2, onnx_model = deployments_run.get_output(return_onnx_model=True)
# Save the model
onnx_path = "./models/best_onnx.onnx"
OnnxConverter.save_onnx_model(onnx_model, onnx_path)
# Register the model
model_onnx = Model.register(workspace=ws, model_name='bankmarketing-onnx', model_path=onnx_path, model_framework=Model.Framework.ONNX,                                       model_framework_version='1.3')

#service_name = 'onnx-bankmarketing'
#service = Model.deploy(ws, service_name, [model_onnx])

Registering model bankmarketing-onnx


# ONNX

In [17]:
# Start inference session
session = onnxruntime.InferenceSession(onnx_path)

In [19]:
# Run the inference
examples_onnx = examples.take(5).to_pandas_dataframe()
try:
    result = session.run([],examples_onnx)
    print(result)
except Exception as e:
    result = str(e)
    print(result)

Model requires 20 inputs. Input Feed contains 5


In [36]:
# One data for ONNX                             

data1 =  {
            "age": 17,
            "campaign": 1,
            "cons.conf.idx": -46.2,
            "cons.price.idx": 92.893,
            "contact": "cellular",
            "day_of_week": "mon",
            "default": "no",
            "duration": 971,
            "education": "university.degree",
            "emp.var.rate": -1.8,
            "euribor3m": 1.299,
            "housing": "yes",
            "job": "blue-collar",
            "loan": "yes",
            "marital": "married",
            "month": "may",
            "nr.employed": 5099.1,
            "pdays": 999,
            "poutcome": "failure",
            "previous": 1
          }



pred = session.run([],data1)  
print(pred)        

RuntimeError: Unable to handle object of type <class 'int'>

# DOCKER

In [23]:
# Prepare to build an image
### NOTE: download the model and extract into ./models)
model = Model(ws, 'deployments-bankmarketing')   
env = Environment.from_conda_specification('docker', './models/conda_env_v_1_0_0.yml')
inference_config = InferenceConfig(entry_script='./models/scoring_file_v_1_0_0.py', environment=env)         

### BUILD AN IMAGE AND PULL IT

In [24]:
# Build a standard image and pull it
package = Model.package(ws, [model], inference_config)   
package.wait_for_creation(show_output=True)    
package.pull() 

2020/11/27 17:27:10 Downloading source code...
2020/11/27 17:27:11 Finished downloading source code
2020/11/27 17:27:12 Creating Docker network: acb_default_network, driver: 'bridge'
2020/11/27 17:27:12 Successfully set up Docker network: acb_default_network
2020/11/27 17:27:12 Setting up Docker configuration...
2020/11/27 17:27:13 Successfully set up Docker configuration
2020/11/27 17:27:13 Logging in to registry: b4ebf977fa1b4412adb84bc5f3bef665.azurecr.io
2020/11/27 17:27:15 Successfully logged into b4ebf977fa1b4412adb84bc5f3bef665.azurecr.io
2020/11/27 17:27:15 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2020/11/27 17:27:15 Scanning for dependencies...
2020/11/27 17:27:16 Successfully scanned dependencies
2020/11/27 17:27:16 Launching container with name: acb_step_0
Sending build context to Docker daemon     64kB
Step 1/15 : FROM mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200821.v1@sha256:8cee6f674276dddb23068

The above cell should produce a result such as:

Downloaded newer image for myworkspacef78fd10.azurecr.io/package:20190822181338.

In git-bash run:

"docker images"

"docker run -p 6789:5001 --name mycontainer imageID" (replace id number)

After the container is started, submit requests to http://localhost:6789/score

In [None]:
# Test the container   
 
# Prepare 5 example data for Docker
examples = examples.take(5).to_pandas_dataframe()
data_docker = {"data":examples.to_json(orient='records')}                                         

# URL for the web service.
scoring_uri = 'http://localhost:6789/score'

# Convert data to JSON string.
input_data = json.dumps(data_docker)

# Set the content type.
headers = {'Content-Type': 'application/json'}

# Make the request and display the response.
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

### TO DOWNLAD FILES TO BUILD A LOCAL IMAGE

In [25]:
# Package with the dockerfile rather than the image
package = model.package(ws, [], inference_config, generate_dockerfile=True)
package.wait_for_creation(show_output=True)
# Download the package.
package.save("./imagefiles")
# Get the Azure container registry that the model/Dockerfile uses.
acr=package.get_container_registry()
print("Address:", acr.address)
print("Username:", acr.username)
print("Password:", acr.password)

Package creation Succeeded
Logging into Docker registry b4ebf977fa1b4412adb84bc5f3bef665.azurecr.io
Address: b4ebf977fa1b4412adb84bc5f3bef665.azurecr.io
Username: b4ebf977fa1b4412adb84bc5f3bef665
Password: X0ED=y8qbqAqTxj377tDw5ZQPoYNwSRd


In git-bash run:

"docker login (address) -u (username) -p (password)"

"docker build --tag myimage imagefiles" (replace imagefiles with the path were the image was saved)

"docker image"

"docker run -p 6789:5001 --name mycontainer myimage:latest"

When finished run:

"docker kill mycontainer"

# BATCH SCORING

In [26]:
# Get default datastore
datastore = ws.get_default_datastore()
# output
output_dir = PipelineData(name="scores", datastore=datastore, output_path_on_compute="./results")
# environment
env = Environment(name="deployments")     
# Prepare data for batch inference
examples_batch = examples.as_named_input("examples_batch")

In [27]:
# Parallel run configuration
parallel_run_config = ParallelRunConfig(source_directory='.',
                                        entry_script='batch_inference.py', 
                                        environment=env,
                                        error_threshold=5,
                                        output_action='append_row',
                                        append_row_file_name="batch_inference.txt",
                                        compute_target=compute_target, 
                                        node_count=1)                       

In [28]:
# Batch scoring step    
batch_score_step = ParallelRunStep(name="batch-scoring",
                                    inputs=[examples_batch],                            
                                    output=output_dir,
                                    arguments=["--model_name", 'deployments-bankmarketing'],  
                                    parallel_run_config=parallel_run_config,
                                    allow_reuse=True)

In [29]:
# Submit the pipeline
pipeline = Pipeline(workspace=ws, steps=[batch_score_step])
batch_run = experiment.submit(pipeline)
batch_run.wait_for_completion(show_output=True)

Created step batch-scoring [f6835d3c][b4c6cd85-fe13-4275-9b97-bc10302cf870], (This step will run and generate new outputs)
Submitted PipelineRun 9aa84471-ec5c-43a0-a49e-2306a9946b62
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/deployments-bankmarketing/runs/9aa84471-ec5c-43a0-a49e-2306a9946b62?wsid=/subscriptions/dafd62fe-51d3-4671-af50-6bac2f001b5d/resourcegroups/aml-quickstarts-128108/workspaces/quick-starts-ws-128108
PipelineRunId: 9aa84471-ec5c-43a0-a49e-2306a9946b62
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/deployments-bankmarketing/runs/9aa84471-ec5c-43a0-a49e-2306a9946b62?wsid=/subscriptions/dafd62fe-51d3-4671-af50-6bac2f001b5d/resourcegroups/aml-quickstarts-128108/workspaces/quick-starts-ws-128108
PipelineRun Status: Running


StepRunId: a0e18dd2-c478-419a-965b-a918d577f270
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/deployments-bankmarketing/runs/a0e18dd2-c478-419a-965b-a918d577f270?wsid=/subs

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "AzureMLCompute job failed.\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.",
        "details": []
    },
    "correlation": {
        "operation": null,
        "request": "0946ed7ec48ee0f9"
    },
    "environment": "southcentralus",
    "location": "southcentralus",
    "time": "2020-11-27T18:08:12.215898Z",
    "componentName": "execution-worker"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"AzureMLCompute job failed.\\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.\",\n        \"details\": []\n    },\n    \"correlation\": {\n        \"operation\": null,\n        \"request\": \"0946ed7ec48ee0f9\"\n    },\n    \"environment\": \"southcentralus\",\n    \"location\": \"southcentralus\",\n    \"time\": \"2020-11-27T18:08:12.215898Z\",\n    \"componentName\": \"execution-worker\"\n}"
    }
}

In [31]:
# Publish pipeline
published_pipeline = batch_run.publish_pipeline(name="batch-scoring", description="Batch scoring for project 2", version=1.0)

In [32]:
# Interactive authentication to get authentication header
from azureml.pipeline.core.run import PipelineRun
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

### ENDPOINT FOR THE PIPELINE

In [33]:
# Publish the pipeline to an endpoint
rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, headers=auth_header, json={"ExperimentName": "batch-scoring"})
run_id = response.json()["Id"]

In [34]:
# Run it again
published_pipeline_run = PipelineRun(ws.experiments["batch-scoring"], run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

## Explore results

In [None]:
'''
results = next(pipeline_run.get_children())
batch_output = results.get_output_data("scores")        ## esta linea puede traer problemas
batch_output.download(local_path='./outputs')           ## esta linea puede traer problemas

for root, dirs, files in os.walk('./outputs'):
    for file in files:
        if file.endswith("batch_inference.txt"):
            result_file = os.path.join(root, file)
 '''           

In [None]:
'''
df = pd.read_csv(result_file, header=None)    # delimiter=":"
df.columns = ["age","campaign","cons.conf.idx","cons.price.idx","contact","day_of_week","default","duration","education","emp.var.rate",
              "euribor3m","housing","job","loan","marital","month","nr.employed","pdays","poutcome","previous","prediction"]
df.head(10)
'''