# AUTOML EXPERIMENT

In [None]:
import azureml.core
from azureml.core.workspace import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Experiment
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.core.model import Model
from azureml.pipeline.core import Pipeline
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import ParallelRunStep, ParallelRunConfig
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.datastore import Datastore
from azureml.automl.runtime.onnx_convert import OnnxConverter
from azureml.train.automl import constants
import onnxruntime
import pandas as pd
import joblib
import os
import sys
import json
import requests

In [None]:
# MODELS
if "models" not in os.listdir():
    os.mkdir("./models")
# OUTPUS
if "outputs" not in os.listdir():
    os.mkdir("./outputs")

# WORKSPACE AND EXPERIMENT

In [None]:
# load workspace   (DON'T FORGET TO DOWNLOAD CONFIG.JSON)
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
# Create experiment
experiment_name = 'deployments-bankmarketing'
project_folder = './deployments-project'
experiment = Experiment(ws, experiment_name)

# LOAD DATASET AND PRESERVE DATA FOR INFERENCE

In [None]:
# Create Dataset and register it into Workspace unless it´s already registered
found = False
key = "BankMarketing Dataset"
description_text = "Bank Marketing DataSet for Udacity Course 2"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
    example_data = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
    dataset = Dataset.Tabular.from_delimited_files(example_data)        
    dataset = dataset.register(workspace=ws,name=key,description=description_text)

In [None]:
# Keep 0.1% of the data for batch inference
train, examples = dataset.random_split(0.999,seed=42)
# Drop y from inference sample
examples = examples.drop_columns('y')

# AUTOML EXPERIMENT

In [None]:
# create or load compute cluster
cluster_name = "aml-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D12_V2', min_nodes=1, max_nodes=5)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)  

In [None]:
# Configure AutoML
automl_settings = {"experiment_timeout_minutes": 15,
                    "max_concurrent_iterations": 5,
                    "primary_metric" : 'AUC_weighted'}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=train,
                             label_column_name="y",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             enable_onnx_compatible_models=True,
                             **automl_settings)

In [None]:
# Run the experiment and show details
deployments_run = experiment.submit(automl_config, show_output = True)
#RunDetails(deployments_run).show()

## REGISTER BEST MODEL AND BEST ONNX MODEL

In [None]:
# Register the best model
model = deployments_run.register_model(model_name='deployments-bankmarketing')
# Save the model
best_run1, fitted_model = deployments_run.get_output()
joblib.dump(fitted_model, filename='models/base_model.joblib')

In [None]:
# Get the best ONNX model
best_run2, onnx_model = deployments_run.get_output(return_onnx_model=True)
# Save the model
onnx_path = "./models/best_onnx.onnx"
OnnxConverter.save_onnx_model(onnx_model, onnx_path)
# Register the model
model_onnx = Model.register(workspace=ws, model_name='bankmarketing-onnx', model_path=onnx_path, model_framework=Model.Framework.ONNX,                                       model_framework_version='1.3')

#service_name = 'onnx-bankmarketing'
#service = Model.deploy(ws, service_name, [model_onnx])

# ONNX

In [None]:
# Start inference session
session = onnxruntime.InferenceSession(onnx_path)

In [None]:
# Run the inference
try:
    result = session.run([],data)
    print(result)
except Exception as e:
    result = str(e)
    print(result)

In [None]:
# One data for ONNX                             #### USAR SOLO SI NO FUNCIONA LO ANTERIOR

data1 =  {
            "age": 17,
            "campaign": 1,
            "cons.conf.idx": -46.2,
            "cons.price.idx": 92.893,
            "contact": "cellular",
            "day_of_week": "mon",
            "default": "no",
            "duration": 971,
            "education": "university.degree",
            "emp.var.rate": -1.8,
            "euribor3m": 1.299,
            "housing": "yes",
            "job": "blue-collar",
            "loan": "yes",
            "marital": "married",
            "month": "may",
            "nr.employed": 5099.1,
            "pdays": 999,
            "poutcome": "failure",
            "previous": 1
          }

@input_schema('inputs', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
data = inputs[data1]
assert isinstance(data, np.ndarray)

pred = session.run([],data)  
print(pred)        

# DOCKER

In [None]:
# Prepare to build an image
### NOTE: download the model and extract into ./models)
model = Model(ws, 'deployments-bankmarketing')   
env = Environment.from_conda_specification('docker', './models/conda_env_v_1_0_0.yml')
inference_config = InferenceConfig(entry_script='./models/scoring_file_v_1_0_0.py', environment=env)         

### BUILD AN IMAGE AND PULL IT

In [None]:
# Build a standard image and pull it
package = Model.package(ws, [model], inference_config)   
package.wait_for_creation(show_output=True)    
package.pull() 

The above cell should produce a result such as:

Downloaded newer image for myworkspacef78fd10.azurecr.io/package:20190822181338.

In git-bash run:

"docker images"

"docker run -p 6789:5001 --name mycontainer imageID" (replace id number)

After the container is started, submit requests to http://localhost:6789/score

### TO DOWNLAD FILES TO BUILD A LOCAL IMAGE

In [None]:
# Package with the dockerfile rather than the image
package = model.package(ws, [], inference_config, generate_dockerfile=True)
package.wait_for_creation(show_output=True)
# Download the package.
package.save("./imagefiles")
# Get the Azure container registry that the model/Dockerfile uses.
acr=package.get_container_registry()
print("Address:", acr.address)
print("Username:", acr.username)
print("Password:", acr.password)

In git-bash run:

"docker login (address) -u (username) -p (password)"

"docker build --tag myimage imagefiles" (replace imagefiles with the path were the image was saved)

"docker image"

"docker run -p 6789:5001 --name mycontainer myimage:latest"

When finished run:

"docker kill mycontainer"

In [None]:
# Test the container   
 
# Prepare 5 example data for Docker
examples = examples.take(5).to_pandas_dataframe()
data_docker = {"data":examples.to_json(orient='records')}                                         

# URL for the web service.
scoring_uri = 'http://localhost:6789/score'

# Convert data to JSON string.
input_data = json.dumps(data_docker)

# Set the content type.
headers = {'Content-Type': 'application/json'}

# Make the request and display the response.
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

# BATCH SCORING

In [None]:
# Get default datastore
datastore = ws.get_default_datastore()
# output
output_dir = PipelineData(name="scores", datastore=datastore, output_path_on_compute="./results")
# environment
env = Environment(name="deployments")     
# Prepare data for batch inference
examples_batch = examples.as_named_input("examples_batch")

In [None]:
# Parallel run configuration
parallel_run_config = ParallelRunConfig(source_directory='.',
                                        entry_script='batch_inference.py', 
                                        environment=env,
                                        error_threshold=5,
                                        output_action='append_row',
                                        append_row_file_name="batch_inference.txt",
                                        compute_target=compute_target, 
                                        node_count=1)                       ## Try this

In [None]:
# Batch scoring step    
batch_score_step = ParallelRunStep(name="batch-scoring",
                                    inputs=[examples_batch],                            ## acá está el problema
                                    output=output_dir,
                                    arguments=["--model_name", 'deployments-bankmarketing'],  
                                    parallel_run_config=parallel_run_config,
                                    allow_reuse=True)

In [None]:
# Submit the pipeline
pipeline = Pipeline(workspace=ws, steps=[batch_score_step])
batch_run = experiment.submit(pipeline)
batch_run.wait_for_completion(show_output=True)

In [None]:
# Publish pipeline
published_pipeline = batch_run.publish_pipeline(name="batch-scoring", description="Batch scoring for project 2")

In [None]:
# Interactive authentication to get authentication header
from azureml.pipeline.core.run import PipelineRun
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

### ENDPOINT FOR THE PIPELINE

In [None]:
# Publish the pipeline to an endpoint
rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, headers=auth_header, json={"ExperimentName": "batch-scoring"})
run_id = response.json()["Id"]

In [None]:
# Run it again
published_pipeline_run = PipelineRun(ws.experiments["batch-scoring"], run_id)
RunDetails(published_pipeline_run).show()

## Explore results

In [None]:
'''
results = next(pipeline_run.get_children())
batch_output = results.get_output_data("scores")        ## esta linea puede traer problemas
batch_output.download(local_path='./outputs')           ## esta linea puede traer problemas

for root, dirs, files in os.walk('./outputs'):
    for file in files:
        if file.endswith("batch_inference.txt"):
            result_file = os.path.join(root, file)
 '''           

In [None]:
'''
df = pd.read_csv(result_file, header=None)    # delimiter=":"
df.columns = ["age","campaign","cons.conf.idx","cons.price.idx","contact","day_of_week","default","duration","education","emp.var.rate",
              "euribor3m","housing","job","loan","marital","month","nr.employed","pdays","poutcome","previous","prediction"]
df.head(10)
'''