In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-226641
Azure region: northcentralus
Subscription id: 61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30
Resource group: aml-quickstarts-226641


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cluster_name = "compute-cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

try:
    aml_compute = ComputeTarget(workspace=ws, name=cluster_name)
    print('An existing cluster will be used!')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', max_nodes=4)
    aml_compute = ComputeTarget.create(ws, cluster_name, compute_config)
    print('An new cluster will be created now!')

aml_compute.wait_for_completion(show_output=True)

An new cluster will be created now!
InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [7]:
pip install azureml-train-automl-runtime

Collecting azureml-train-automl-runtime
  Using cached azureml_train_automl_runtime-1.49.0-py3-none-any.whl (338 kB)
Collecting azureml-automl-core~=1.49.0
  Using cached azureml_automl_core-1.49.0-py3-none-any.whl (244 kB)
Collecting azureml-automl-runtime~=1.49.0
  Using cached azureml_automl_runtime-1.49.0-py3-none-any.whl (1.8 MB)
Collecting lightgbm<=3.2.1,>=2.0.11
  Downloading lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting onnxmltools==1.4.1
  Downloading onnxmltools-1.4.1-py2.py3-none-any.whl (371 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m371.5/371.5 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting smart-open<=1.9.0
  Downloading smart_open-1.9.0.tar.gz (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Pr

In [12]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--C': choice(0.1, 0.5, 1, 10),
        '--max_iter': choice(10, 50, 100, 200)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, delay_evaluation = 5, evaluation_interval = 1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(source_directory='.',
                      command=['python', 'train.py'],
                      compute_target=aml_compute,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=src,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=4)


# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
            compute_target=aml_compute,
            vm_size='STANDARD_D2_V2',
            entry_script="train.py")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=ps, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=policy,
                                     estimator=est,
                                     max_total_runs=16,
                                     max_concurrent_runs=4)                                     

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [13]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(hyperdrive_config)
hyperdrive_run.wait_for_completion(show_output=True)
assert(hyperdrive_run.get_status() == "Completed")



RunId: HD_aad58645-3118-423b-80ac-917352d2a048
Web View: https://ml.azure.com/runs/HD_aad58645-3118-423b-80ac-917352d2a048?wsid=/subscriptions/61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30/resourcegroups/aml-quickstarts-226641/workspaces/quick-starts-ws-226641&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

[2023-02-22T20:07:32.236883][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space
[2023-02-22T20:07:33.1598254Z][SCHEDULER][INFO]Scheduling job, id='HD_aad58645-3118-423b-80ac-917352d2a048_0' 
[2023-02-22T20:07:33.3387914Z][SCHEDULER][INFO]Scheduling job, id='HD_aad58645-3118-423b-80ac-917352d2a048_1' 
[2023-02-22T20:07:33.533098][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.
[2023-02-22T20:07:33.5983130Z][SCHEDULER][INFO]Scheduling job, id='HD_aad58645-3118-423b-80ac-917352d2a048_3' 
[2023-02-22T20:07:33.5068685Z][SCHEDULER][INFO]Scheduling job, id='HD_aad58645-3118-423b-80ac-917352d2a0

In [19]:
import joblib
# Get your best run and save the model from that run.
### YOUR CODE HERE ###
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n Accuracy is', best_run_metrics['Accuracy'])
print('\n Regularization Strength is',best_run_metrics['Regularization Strength:'])
print('\n Max Iterations is',best_run_metrics['Max iterations:'])

model = best_run.register_model(model_name='Model_HighAccuracy', model_path='./', 
                                properties={'Accuracy': best_run_metrics['Accuracy'],
                                            'Regularization Strength': best_run_metrics['Regularization Strength:'],
                                            'Max Iterations': best_run_metrics['Max iterations:']})

Best Run Id:  HD_aad58645-3118-423b-80ac-917352d2a048_2

 Accuracy is 0.9061665452779801

 Regularization Strength is 10.0

 Max Iterations is 50


In [118]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(url)

from azureml.core import Dataset

dataset = Dataset.Tabular.from_delimited_files(url)

# preview the first 3 rows of the dataset
dataset.take(3).to_pandas_dataframe()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no


In [121]:
from train import clean_data
from sklearn.model_selection import train_test_split

x, y = clean_data(dataset)

# Split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
train_data = x_train.join(y_train)
train_data.head(5)

datastore = ws.get_default_datastore()

ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe=train_data, 
        name='data_source', 
        target=datastore
    )

In [114]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.


automl_config = AutoMLConfig(
    compute_target = aml_compute,
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    enable_onnx_compatible_models=True,
    n_cross_validations=2)

In [115]:
type(ds)

azureml.data.tabular_dataset.TabularDataset

In [116]:
# Submit your automl run

### YOUR CODE HERE ###
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails
 
print('Submit the AutoML run!')
automl_run = exp.submit(automl_config)

RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

Submit the AutoML run!


ContextualVersionConflict: (azureml-telemetry 1.48.0 (/anaconda/envs/jupyter_env/lib/python3.8/site-packages), Requirement.parse('azureml-telemetry~=1.49.0'), {'azureml-automl-core'})