In [40]:
import pandas as pd
pd.set_option('display.max_columns', 100)

## Workspace

In [1]:
from azureml.core import Workspace

In [2]:
ws = Workspace.from_config()

In [3]:
ws

Workspace.create(name='quick-starts-ws-169110', subscription_id='510b94ba-e453-4417-988b-fbdc37b55ca7', resource_group='aml-quickstarts-169110')

## Dataset

In [5]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='bank')
df = dataset.to_pandas_dataframe()

In [6]:
df.shape

(32950, 21)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32950 entries, 0 to 32949
Data columns (total 21 columns):
age               32950 non-null int64
job               32950 non-null object
marital           32950 non-null object
education         32950 non-null object
default           32950 non-null object
housing           32950 non-null object
loan              32950 non-null object
contact           32950 non-null object
month             32950 non-null object
day_of_week       32950 non-null object
duration          32950 non-null int64
campaign          32950 non-null int64
pdays             32950 non-null int64
previous          32950 non-null int64
poutcome          32950 non-null object
emp.var.rate      32950 non-null float64
cons.price.idx    32950 non-null float64
cons.conf.idx     32950 non-null float64
euribor3m         32950 non-null float64
nr.employed       32950 non-null float64
y                 32950 non-null object
dtypes: float64(5), int64(5), object(11)
memory usa

In [8]:
df.head(2)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no


## Experiment

In [11]:
from azureml.core import Experiment

experiment_name = 'bank-exp-01'
project_folder = './pipeline-bank-project'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
bank-exp-01,quick-starts-ws-169110,Link to Azure Machine Learning studio,Link to Documentation


## Compute Cluster

In [12]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "auto-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
# For a more detailed view of current AmlCompute status, use get_status().

InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded.....................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [15]:
compute_target

AmlCompute(workspace=Workspace.create(name='quick-starts-ws-169110', subscription_id='510b94ba-e453-4417-988b-fbdc37b55ca7', resource_group='aml-quickstarts-169110'), name=auto-ml, id=/subscriptions/510b94ba-e453-4417-988b-fbdc37b55ca7/resourceGroups/aml-quickstarts-169110/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-169110/computes/auto-ml, type=AmlCompute, provisioning_state=Succeeded, location=southcentralus, tags=None)

## AutoML Training Configuration

In [16]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep

In [17]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy',
    "n_cross_validations": 5
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset, 
                             label_column_name="y",  
                             path = project_folder,
                             enable_early_stopping= True,
                             #featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

## Pipeline and AutoML Step

In [18]:
from azureml.pipeline.core import PipelineData, TrainingOutput

ds = ws.get_default_datastore()
metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
                           datastore=ds,
                           pipeline_output_name=metrics_output_name,
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
                           datastore=ds,
                           pipeline_output_name=best_model_output_name,
                           training_output=TrainingOutput(type='Model'))

In [19]:
ds

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-0c6b8ebf-9bfe-4aec-99eb-8b15a420398f",
  "account_name": "mlstrg169110",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [20]:
automl_step = AutoMLStep(
    name='automl_module',
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True)

In [21]:
from azureml.pipeline.core import Pipeline
pipeline = Pipeline(
    description="pipeline_with_automlstep",
    workspace=ws,    
    steps=[automl_step])

In [22]:
pipeline_run = experiment.submit(pipeline)

Created step automl_module [59762c30][1ec8dd4b-4611-470b-9def-0435577b81d6], (This step will run and generate new outputs)
Submitted PipelineRun 2451dc03-77ef-4ffc-b7f8-51735fcf03f9
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/2451dc03-77ef-4ffc-b7f8-51735fcf03f9?wsid=/subscriptions/510b94ba-e453-4417-988b-fbdc37b55ca7/resourcegroups/aml-quickstarts-169110/workspaces/quick-starts-ws-169110&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254


In [23]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [24]:
pipeline_run.wait_for_completion()

PipelineRunId: 2451dc03-77ef-4ffc-b7f8-51735fcf03f9
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/2451dc03-77ef-4ffc-b7f8-51735fcf03f9?wsid=/subscriptions/510b94ba-e453-4417-988b-fbdc37b55ca7/resourcegroups/aml-quickstarts-169110/workspaces/quick-starts-ws-169110&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '2451dc03-77ef-4ffc-b7f8-51735fcf03f9', 'status': 'Completed', 'startTimeUtc': '2021-12-26T19:14:12.127694Z', 'endTimeUtc': '2021-12-26T19:42:57.099878Z', 'services': {}, 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}', 'azureml.continue_on_step_failure': 'False', 'azureml.pipelineComponent': 'pipelinerun'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://mlstrg169110.blob.core.windows.net/azureml/ExperimentRun/dcid.2451dc03-77ef-4ffc-b7f8-51735fcf03f9/logs/azureml/execution

'Finished'

In [25]:
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress=True)

Downloading azureml/5ec83b31-d37a-4ba1-b554-683bf22e2d8a/metrics_data
Downloaded azureml/5ec83b31-d37a-4ba1-b554-683bf22e2d8a/metrics_data, 1 files out of an estimated total of 1


In [28]:
import pandas as pd
import json
with open(metrics_output._path_on_datastore) as f:
    metrics_output_result = f.read()
    
deserialized_metrics_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrics_output)
df

Unnamed: 0,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_2,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_6,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_8,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_0,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_3,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_9,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_4,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_1,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_5,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_7,...,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_14,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_23,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_29,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_31,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_24,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_28,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_25,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_16,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_32,5ec83b31-d37a-4ba1-b554-683bf22e2d8a_30
accuracy,[0.7327162367223065],[0.908619119878604],[0.8881942336874051],[0.9144157814871017],[0.9138088012139605],[0.910773899848255],[0.9126251896813352],[0.9149317147192717],[0.8881638846737481],[0.9082549317147193],...,[0.9149013657056144],[0.9092867981790592],[0.9134142640364189],[0.9113201820940819],[0.8879514415781486],[0.9100455235204856],[0.8879514415781486],[0.9089226100151745],[0.9123520485584219],[0.9129893778452202]
AUC_micro,[0.832972923061336],[0.976753917394498],[0.9526977325740706],[0.9810792321100854],[0.9790505133772834],[0.977196732069789],[0.9800596111734109],[0.9806089467418561],[0.9758588563625855],[0.9739743852482607],...,[0.9788902162424791],[0.9743518275033907],[0.9794725603929253],[0.9791142831484685],[0.963926577492453],[0.9783353059424659],[0.8879514415781488],[0.9730803373852414],[0.979620229759073],[0.9795234652218264]
recall_score_weighted,[0.7327162367223065],[0.908619119878604],[0.8881942336874051],[0.9144157814871017],[0.9138088012139605],[0.910773899848255],[0.9126251896813352],[0.9149317147192717],[0.8881638846737481],[0.9082549317147193],...,[0.9149013657056144],[0.9092867981790592],[0.9134142640364189],[0.9113201820940819],[0.8879514415781486],[0.9100455235204856],[0.8879514415781486],[0.9089226100151745],[0.9123520485584219],[0.9129893778452202]
average_precision_score_macro,[0.703466734680023],[0.8022951053050777],[0.7084154064260056],[0.8262948541138542],[0.816559316631626],[0.8081879502745484],[0.8252516776526685],[0.8270301995665266],[0.8169207971155308],[0.7888879716269412],...,[0.814613209583755],[0.7912949569608573],[0.8140481937414676],[0.8091702589841543],[0.7209128611213986],[0.8068410409342324],[0.5],[0.7825107476338486],[0.8137213125408873],[0.812663714606451]
precision_score_micro,[0.7327162367223065],[0.908619119878604],[0.8881942336874051],[0.9144157814871017],[0.9138088012139605],[0.910773899848255],[0.9126251896813352],[0.9149317147192717],[0.8881638846737481],[0.9082549317147193],...,[0.9149013657056144],[0.9092867981790592],[0.9134142640364189],[0.9113201820940819],[0.8879514415781486],[0.9100455235204856],[0.8879514415781486],[0.9089226100151745],[0.9123520485584219],[0.9129893778452202]
f1_score_micro,[0.7327162367223065],[0.9086191198786041],[0.8881942336874051],[0.9144157814871017],[0.9138088012139605],[0.910773899848255],[0.9126251896813352],[0.9149317147192717],[0.8881638846737481],[0.9082549317147193],...,[0.9149013657056144],[0.9092867981790592],[0.9134142640364189],[0.9113201820940819],[0.8879514415781486],[0.9100455235204856],[0.8879514415781486],[0.9089226100151745],[0.9123520485584219],[0.9129893778452202]
log_loss,[0.5567037697349142],[0.32736238728255757],[0.2882445113311118],[0.17259344664205858],[0.31539739269508277],[0.330327232922604],[0.18545074801612443],[0.17680045065995267],[0.24646693752178436],[0.21193871579399834],...,[0.3151152761650368],[0.21040843887887672],[0.18732864004241176],[0.18931171255484097],[0.27931338007320017],[0.20458597302766757],[0.35078682183108895],[0.216829934325902],[0.18532526584424516],[0.18564344640976732]
matthews_correlation,[0.29961701350918135],[0.45965798214456044],[0.019338549550406807],[0.5417959465994486],[0.5351650356202582],[0.458519480439017],[0.48456813874230537],[0.5283949538267647],[0.03065743850634507],[0.4605877628620255],...,[0.542864341502229],[0.47273175540276935],[0.5389307088133488],[0.5267142681821554],[0.0],[0.5239561164148386],[0.0],[0.46445107103569094],[0.5334935805712999],[0.5367693541296045]
precision_score_weighted,[0.8733482297182045],[0.897477459260972],[0.8076873205647658],[0.9092869525277483],[0.908127213957241],[0.8998767644864023],[0.9016562771376175],[0.9074494120258981],[0.8557913685491567],[0.8961170311350969],...,[0.9095786707055116],[0.8980078363329836],[0.9086286585662174],[0.9062161567160775],[0.7884613418500925],[0.9055667699359725],[0.7884613418500925],[0.8969949596145484],[0.9075594600240784],[0.9082072255360674]
recall_score_micro,[0.7327162367223065],[0.908619119878604],[0.8881942336874051],[0.9144157814871017],[0.9138088012139605],[0.910773899848255],[0.9126251896813352],[0.9149317147192717],[0.8881638846737481],[0.9082549317147193],...,[0.9149013657056144],[0.9092867981790592],[0.9134142640364189],[0.9113201820940819],[0.8879514415781486],[0.9100455235204856],[0.8879514415781486],[0.9089226100151745],[0.9123520485584219],[0.9129893778452202]


In [29]:
best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress=True)

Downloading azureml/5ec83b31-d37a-4ba1-b554-683bf22e2d8a/model_data
Downloaded azureml/5ec83b31-d37a-4ba1-b554-683bf22e2d8a/model_data, 1 files out of an estimated total of 1


In [30]:
import pickle

with open(best_model_output._path_on_datastore, "rb" ) as f:
    best_model = pickle.load(f)
best_model

PipelineWithYTransformations(Pipeline={'memory': None,
                                       'steps': [('datatransformer',
                                                  DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mn...
)), ('logisticregression', LogisticRegression(C=51.79474679231202, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, verbose=0, warm_start=False))], verbose=False))], flatten_transform=None, weights=[0.14285714285714285, 0.2857142857142857, 0.07142857142857142, 0.07142857142857142, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142]))],
                               

In [31]:
best_model.steps

[('datatransformer',
  DataTransformer(
      task='classification',
      is_onnx_compatible=False,
      enable_feature_sweeping=True,
      enable_dnn=False,
      force_text_dnn=False,
      feature_sweeping_timeout=86400,
      featurization_config=None,
      is_cross_validation=True,
      feature_sweeping_config={}
  )),
 ('prefittedsoftvotingclassifier',
  PreFittedSoftVotingClassifier(
      estimators=[('1', Pipeline(
          memory=None,
          steps=[('maxabsscaler', MaxAbsScaler(
              copy=True
          )), ('xgboostclassifier', XGBoostClassifier(
              random_state=0,
              n_jobs=1,
              problem_info=ProblemInfo(
                  gpu_training_param_dict={'processing_unit_type': 'cpu'}
              ),
              tree_method='auto'
          ))],
          verbose=False
      )), ('14', Pipeline(
          memory=None,
          steps=[('standardscalerwrapper', StandardScalerWrapper(
              copy=True,
              with_

## Publish and run from REST endpoint

In [32]:
published_pipeline = pipeline_run.publish_pipeline(
    name="Bank Train", description="Training Bank Marketing Pipeline", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
Bank Train,c8aa2b75-a1b4-4386-82fd-f8c1c63b591f,Active,REST Endpoint


In [33]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

In [34]:
import requests

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": "pipeline-bank-rest-endpoint"}
                        )

In [35]:
try:
    response.raise_for_status()
except Exception:    
    raise Exception("Received bad response from the endpoint: {}\n"
                    "Response Code: {}\n"
                    "Headers: {}\n"
                    "Content: {}".format(rest_endpoint, response.status_code, response.headers, response.content))

run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)

Submitted pipeline run:  801e6271-2029-47d3-968b-b00f6575e0f5


In [36]:
from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails

published_pipeline_run = PipelineRun(ws.experiments["pipeline-bank-rest-endpoint"], run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [43]:
from azureml.core.webservice import Webservice

name = "bank-automl-endpoint-02"
# load existing web service
service = Webservice(name=name, workspace=ws)

In [44]:
service

AciWebservice(workspace=Workspace.create(name='quick-starts-ws-169110', subscription_id='510b94ba-e453-4417-988b-fbdc37b55ca7', resource_group='aml-quickstarts-169110'), name=bank-automl-endpoint-02, image_id=None, compute_type=None, state=ACI, scoring_uri=Healthy, tags=http://fd59d40d-1b79-463f-803e-83b3b2468e76.southcentralus.azurecontainer.io/score, properties={}, created_by={'runId': '5ec83b31-d37a-4ba1-b554-683bf22e2d8a_38', 'hasInferenceSchema': 'True', 'hasHttps': 'False'})

In [45]:
service.update(enable_app_insights=True)
logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

2021-12-26T21:20:38,665889200+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-12-26T21:20:38,673910600+00:00 - rsyslog/run 
2021-12-26T21:20:38,668108100+00:00 - iot-server/run 
2021-12-26T21:20:38,677086900+00:00 - nginx/run 
rsyslogd: /azureml-envs/azureml_84c85d362f11658b9008714e1aa4657b/lib/libuuid.so.1: no version information available (required by rsyslogd)
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-12-26T21:20:39,089057000+00:00 - iot-server/finish 1 0
2021-12-26T21:20:39,091194700+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (74)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 100
SPARK_HOME not set. Skipping PySpark Initialization.
Generating new fontManager, this may take some time...
Initializing logger
2021-12-26 21:20:40,826 | root | INFO | Starting up app insights client
logging socket was