In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-225560
Azure region: eastus2
Subscription id: d4ad7261-832d-46b2-b093-22156001df5b
Resource group: aml-quickstarts-225560


To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code FVUQGY6M3 to authenticate.


In [8]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "free-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found CLuster")
except:
    compute_config = AmlCompute.provisioning_configuration(
                vm_size = "Standard_D2_V2",
                max_nodes= 4)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

Found CLuster


In [39]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling({ "--C" : uniform(0.1,0.9)

})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.2, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory=".",
    script = "train.py",
    compute_target = compute_target, 
    environment = sklearn_env,
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src, hyperparameter_sampling= ps,
                    primary_metric_name = "Accuracy",
                    primary_metric_goal= PrimaryMetricGoal.MAXIMIZE,
                    policy = policy,
                    max_total_runs=10,
 )

In [40]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(config=hyperdrive_config)

#RunDetails(hyperdrive_run).show()

In [41]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [60]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_931c1868-01e5-450e-bdd4-630493e74752
Web View: https://ml.azure.com/runs/HD_931c1868-01e5-450e-bdd4-630493e74752?wsid=/subscriptions/d4ad7261-832d-46b2-b093-22156001df5b/resourcegroups/aml-quickstarts-225560/workspaces/quick-starts-ws-225560&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Execution Summary
RunId: HD_931c1868-01e5-450e-bdd4-630493e74752
Web View: https://ml.azure.com/runs/HD_931c1868-01e5-450e-bdd4-630493e74752?wsid=/subscriptions/d4ad7261-832d-46b2-b093-22156001df5b/resourcegroups/aml-quickstarts-225560/workspaces/quick-starts-ws-225560&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_931c1868-01e5-450e-bdd4-630493e74752',
 'target': 'free-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-02-12T09:24:30.169175Z',
 'endTimeUtc': '2023-02-12T09:27:05.089984Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"Accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'd1509880-70f3-49cf-a814-fa330a823920',
  'user_agent': 'python/3.8.5 (Linux-5.15.0-1031-azure-x86_64-with-glibc2.10) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.48.0',
  'space_size': 'infinite_space_size',
  'score': '0.9139605462822459',
  'best_child_run_id': 'HD_931c1868-01e5-450e-bdd4-630493e74752_3',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_931c1868-01e5-450e-bdd4-630493e74752_3'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryVal

In [61]:
hyperdrive_run.get_status()

'Completed'

In [62]:
import joblib
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()["runDefinition"]["arguments"])
print(best_run.get_file_names())
#model = best_run.register_model(model_name ="hpd", model_path = 'outputs/model')
model = best_run.register_model(model_name="hpd", model_path="./")
### YOUR CODE HERE ###

['--C', '0.7049305187394518']
['logs/azureml/dataprep/0/backgroundProcess.log', 'logs/azureml/dataprep/0/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/0/rslex.log.2023-02-12-09', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'system_logs/metrics_capability/metrics-capability.log', 'system_logs/snapshot_capability/snapshot-capability.log', 'user_logs/std_log.txt']


In [63]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset
# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
path =  "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
### YOUR CODE HERE ###
ds = Dataset.Tabular.from_delimited_files(path)

In [69]:
from train import clean_data
from sklearn.model_selection import train_test_split

# Use the clean_data function to clean your data.
X, y = clean_data(ds)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)


In [76]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task= "classification",
    primary_metric= "accuracy",
    X= X_train,
    y=y_train,
    n_cross_validations=5)



In [80]:
# Submit your automl run
automl_run = exp.submit(automl_config, show_output=False)
automl_run.wait_for_completion()
### YOUR CODE HERE ###


ValidationException: ValidationException:
	Message: Install the required versions of packages using the requirements file. Requirements file location: /anaconda/envs/jupyter_env/lib/python3.8/site-packages/azureml/automl/core/validated_linux_requirements.txt. Alternatively, use remote target to avoid dependency management. 
Package name/Required version/Installed version
azureml-widgets/jinja2<=2.11.2/Jinja2 3.0.3
	InnerException: None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "Install the required versions of packages using the requirements file. Requirements file location: /anaconda/envs/jupyter_env/lib/python3.8/site-packages/azureml/automl/core/validated_linux_requirements.txt. Alternatively, use remote target to avoid dependency management. \nPackage name/Required version/Installed version\nazureml-widgets/jinja2<=2.11.2/Jinja2 3.0.3",
        "target": "azureml-widgets",
        "inner_error": {
            "code": "NotSupported",
            "inner_error": {
                "code": "IncompatibleOrMissingDependency"
            }
        },
        "reference_code": "435ab938-fd87-49bc-932e-6eec0d6aee4f"
    }
}

In [None]:
# Retrieve and save your best automl model.
best_run = automl_run.get_best_child()
### YOUR CODE HERE ###
model = best_run.register_model(model_name = 'best-model-hpd', model_path = "outputs-hpd/model.joblib")
### YOUR CODE HERE ###