In [1]:
from azureml.core import Workspace, Dataset
from azureml.data.dataset_factory import TabularDatasetFactory

# Connect to the Azure ML workspace
ws = Workspace.from_config()

# Define the data path
data_path = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'

# Create and register the TabularDataset directly from the URL
data = TabularDatasetFactory.from_delimited_files(data_path)
data = data.register(workspace=ws, name='dataAutoML_v8', description='New version of dataset from URL', create_new_version=True)

# Verify the new dataset registration
print("New dataset registered as 'dataAutoML_v8'")


New dataset registered as 'dataAutoML_v8'


In [4]:
from azureml.core import Workspace, Experiment, Dataset, ComputeTarget
from azureml.core.compute import AmlCompute
from azureml.exceptions import ComputeTargetException
from azureml.train.automl import AutoMLConfig

# Connect to the Azure ML workspace
ws = Workspace.from_config()

# Define the experiment
experiment_name = 'automl-experiment'
automl_experiment = Experiment(ws, experiment_name)

# Create a new compute cluster with a smaller VM size and reduced number of nodes
compute_name = 'cpu-cluster-small'
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=2)

try:
    compute_target = ComputeTarget(workspace=ws, name=compute_name)
    print('Found existing compute target.')
except ComputeTargetException:
    print('Compute target not found. Creating a new one.')
    compute_target = ComputeTarget.create(ws, compute_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

# Load the new dataset
dataset = Dataset.get_by_name(ws, name='dataAutoML_v8')

# Set parameters for AutoMLConfig using the new dataset
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=dataset,  # Use the new dataset
    label_column_name='y',
    n_cross_validations=5,
    compute_target=compute_target
)

# Submit the AutoML experiment
try:
    run = automl_experiment.submit(config=automl_config)
    run.wait_for_completion(show_output=True)
    run_details = run.get_details()
    print(run_details)
except Exception as e:
    print(f"Failed to submit AutoML run. Error: {e}")


Found existing compute target.
Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_91775691-446d-45e9-8502-527996ad39c5,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_91775691-446d-45e9-8502-527996ad39c5,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+-------------------------------------

In [None]:
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget

# Load the workspace
ws = Workspace.from_config()

# Get the compute target
compute_target = ws.compute_targets['cpu-cluster-small']

# Delete the compute target
compute_target.delete()
