In [1]:
from azureml.core import Workspace
ws = Workspace.from_config()

In [2]:
from azureml.core import Dataset


dataset_name = "Bank Marketing Dataset"
dataset_url =  'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'

if dataset_name in ws.datasets.keys(): 
    print("already existing -> loading old one")
    dataset = ws.datasets[dataset_name] 
else:
        dataset = Dataset.Tabular.from_delimited_files(dataset_url)        
        #register dataset
        dataset = dataset.register(workspace=ws,
                                   name=dataset_name,
                                   )

df = dataset.to_pandas_dataframe()

In [3]:
from azureml.core import ComputeTarget

{
  "source": [
    "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "4896ce90-0896-4229-b253-e4ad2e1f764b",
    "name": "Bank Marketing Dataset",
    "version": 1,
    "workspace": "Workspace.create(name='quick-starts-ws-155110', subscription_id='61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30', resource_group='aml-quickstarts-155110')"
  }
}

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "alpha"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # To use a different region for the compute, add a location='<region>' parameter
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2',
                                                           max_nodes=6,
                                                           min_nodes=1)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

InProgress......
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded.............................................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [6]:
from azureml.train.automl import AutoMLConfig

automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=cpu_cluster,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="y",   
                             path = "./test",
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings)
                            

In [7]:
from azureml.core import Experiment


experiment_name = 'AZMLE-project2-step2'
experiment = Experiment(ws, experiment_name)


In [8]:
from azureml.widgets import RunDetails
from azureml.core.run import Run

automl = experiment.submit(automl_config)
RunDetails(automl).show()
automl.wait_for_completion(show_output=True)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
AZMLE-project2-step2,AutoML_4478f233-d47f-430d-88d8-b29f75c2a55f,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
AZMLE-project2-step2,AutoML_4478f233-d47f-430d-88d8-b29f75c2a55f,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  Your input data has been split into a training dataset and a holdout test dataset for validation of the model. The test holdout dataset reflects the original distribution of your input data.
              
DETAILS:      
+---------------------------------+---------------------------------+---------------------------------+
|Dataset                          |Row counts                       |Percentage                       |
|train                            |29655                            |90.0                             |
|test                             |3295                             |10.0 

{'runId': 'AutoML_4478f233-d47f-430d-88d8-b29f75c2a55f',
 'target': 'alpha',
 'status': 'Completed',
 'startTimeUtc': '2021-08-20T09:26:46.255722Z',
 'endTimeUtc': '2021-08-20T09:52:47.534672Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'alpha',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"4896ce90-0896-4229-b253-e4ad2e1f764b\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.32.0", "azureml-train": "1.32.0", "azureml-train-restclients-hyperdrive": "1.32.0", "azureml-train-core": "1.32.0", "azureml-train-automl": "1.32.0", "azureml-train-automl-runtime": "1.32.0", "azureml-train-automl-client": "1.32.0", "azureml-te

In [9]:

best_run, fitted_model = automl.get_output()
print(fitted_model.steps)

model_name = "bank_marketing_automl_"+best_run.properties['model_name']
description = 'Step 2 Auto Ml Generated model for bank marketing dataset'
tags = None

model = automl.register_model(model_name = model_name, 
                                  description = description, 
                                  tags = tags)

Package:azureml-automl-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-core, training version:1.33.0, current version:1.32.0
Package:azureml-dataprep, training version:2.20.1, current version:2.18.0
Package:azureml-dataprep-native, training version:38.0.0, current version:36.0.0
Package:azureml-dataprep-rslex, training version:1.18.0, current version:1.16.1
Package:azureml-dataset-runtime, training version:1.33.0, current version:1.32.0
Package:azureml-defaults, training version:1.33.0, current version:1.32.0
Package:azureml-interpret, training version:1.33.0, current version:1.32.0
Package:azureml-mlflow, training version:1.33.0, current version:1.32.0
Package:azureml-pipeline-core, training version:1.33.0, current version:1.32.0
Package:azureml-responsibleai, training version:1.33.0, current version:1.32.0
Package:azureml-telemetry, training version:1.33.0, current version:1.32.0
Package:azureml-train-automl-client, training version:1.33.0, current version:1.

[('datatransformer', DataTransformer(
    task='classification',
    is_onnx_compatible=False,
    enable_feature_sweeping=True,
    enable_dnn=False,
    force_text_dnn=False,
    feature_sweeping_timeout=86400,
    featurization_config=None,
    is_cross_validation=False,
    feature_sweeping_config={}
)), ('prefittedsoftvotingclassifier', PreFittedSoftVotingClassifier(
    estimators=[('0', Pipeline(memory=None,
             steps=[('maxabsscaler', MaxAbsScaler(copy=True)),
                    ('lightgbmclassifier',
                     LightGBMClassifier(min_data_in_leaf=20, n_jobs=1, problem_info=ProblemInfo(
        dataset_samples=29655,
        dataset_features=132,
        dataset_classes=None,
        dataset_num_categorical=0,
        dataset_categoricals=None,
        pipeline_categoricals=None,
        dataset_y_std=None,
        dataset_uid=None,
        subsamp...
        subsampling_schedule='hyperband_clip',
        cost_mode_param=None,
        iteration_timeout_mode=