### SetUp

In [1]:
import logging

from matplotlib import pyplot as plt
import pandas as pd
import os

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.automl.core.featurization import FeaturizationConfig
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.interpret._internal.explanation_client import ExplanationClient

### Connect to AML workspace and create an experiment

In [3]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'automlbankmarketing'

experiment=Experiment(ws, experiment_name)

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
Subscription ID,ab8f5415-63b3-4fd4-8a8a-9213316abb6e
Workspace,ADS_AMLworkspace
Resource Group,ADS_Book
Location,westus2
Experiment Name,automlbankmarketing


### Create or use existing Compute Target

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "ninjacpucluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=6)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Load data from BankDataset

In [5]:
bankdata = Dataset.get_by_name(ws, name='bank_dataset')
# bandata = dataset.to_pandas_dataframe()

### Split data into Training and Testing

In [6]:
training_data, validation_data = bankdata.random_split(percentage=0.8, seed=223)
label_column_name = 'deposit'

### Configure AutoML settings

In [7]:
automl_settings = {
    "experiment_timeout_hours" : 0.3,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 5,
    "max_concurrent_iterations": 4,
    "max_cores_per_iteration": -1,
    #"n_cross_validations": 2,
    "primary_metric": 'AUC_weighted',
    "featurization": 'auto',
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             compute_target=compute_target,
                             experiment_exit_score = 0.9984,
                             blocked_models = ['KNN','LinearSVM'],
#                              enable_onnx_compatible_models=True,
                             training_data = training_data,
                             label_column_name = label_column_name,
                             **automl_settings
                            )

### Submit the experiment

In [8]:
remote_run = experiment.submit(automl_config, show_output = False)

Running on remote or ADB.


In [9]:
remote_run

Experiment,Id,Type,Status,Details Page,Docs Page
automlbankmarketing,AutoML_e56af6f2-48e3-4a15-8840-4bf893d21774,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


### Get the best fitted model

In [14]:
best_run, fitted_model = remote_run.get_output()
fitted_model

PipelineWithYTransformations(Pipeline={'memory': None, 'steps': [('datatransformer', DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
        feature_sweeping_config=None, feature_sweeping_timeout=None,
        featurization_config=None, force_text_dnn=None,
        is_cross_validation=None, is_onnx_compatible=Non...66666666667, 0.06666666666666667, 0.06666666666666667, 0.06666666666666667, 0.06666666666666667]))]},
               y_transformer={}, y_transformer_name='LabelEncoder')

### Register the best fitted model

In [17]:
model_name = best_run.properties['model_name']
description = 'AutoML Model trained on bank marketing data to predict if a client will subscribe to a term deposit'
tags = None
model = remote_run.register_model(model_name = model_name, description = description, tags = tags)

In [18]:
model

Model(workspace=Workspace.create(name='ADS_AMLworkspace', subscription_id='ab8f5415-63b3-4fd4-8a8a-9213316abb6e', resource_group='ADS_Book'), name=AutoMLe56af6f2435, id=AutoMLe56af6f2435:1, version=1, tags={}, properties={})