In [None]:
from azureml.core import Workspace, Experiment

ws=Workspace.get(name="quick-starts-ws-146480")
exp=Experiment(workspace=ws, name="udacity-project")

print('Workspace name: '+ws.name, 'Azure region: '+ws.location, 'Subscription id: '+ws.subscription_id, 'Resource group: '+ws.resource_group, sep = '\n')

run = exp.start_logging()

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Create compute cluster
from azureml.core.compute_target import ComputeTargetException

cluster_name="cluster_sd2v2n4"
vm_size="Standard_D2_V2"
vm_priority='lowpriority'
max_nodes=4
min_nodes=1
timeout=10
output=True

try:
	compute_target=ComputeTarget(workspace=ws, name=cluster_name)
except ComputeTargetException:
	compute_config=AmlCompute.provisioning_configuration(vm_size=vm_size, vm_priority = vm_priority, max_nodes=max_nodes)
	compute_target=ComputeTarget.create(ws, cluster_name, compute_config)
	compute_target.wait_for_completion(show_output=output, timeout_in_minutes = timeout, min_node_count = min_node)

In [None]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os

# Specify parameter sampler
params={"--C": uniform(0.0, 100.0), "--max_iter": choice(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150)}
ps=RandomParameterSampling(params)

# Specify a Policy
policy=BanditPolicy(slack_factor=0.5, evaluation_interval=1, delay_evaluation=2)

os.mkdir("./training") if "training" not in os.listdir()
    
# Create a SKLearn estimator for use with train.py
est=SKLearn(source_directory="./training", compute_target=compute_target, entry_script="train.py")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config=HyperDriveConfig(hyperparameter_sampling=ps, estimator=est, policy=policy, primary_metric_name="Accuracy", primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, max_concurrent_runs=4, max_total_runs=5*len(params))	 

In [None]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run=exp.submit(hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()

In [None]:
import joblib
# Get your best run and save the model from that run.
best_run=hyperdrive_run.get_best_run_by_primary_metric()
best_model=best_run.register_model(model_name="best_model" , model_path="./")
best_run
best_run_metrics=best_run.get_metrics()
best_run_details=best_run.get_details()
print("Best run: ", best_run.id)
print("Acurracy: ", best_run_metrics["Accuracy"])
print("Arguments: ", best_run_details["runDefinition"]["arguments"])

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
ds=TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [None]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y=clean_data(ds)
x['label']=y

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
automl_config = AutoMLConfig(experiment_timeout_minutes=30, task='classification', primary_metric='accuracy', training_data=x, label_column_name='label', n_cross_validations=5)

In [2]:
# Submit your automl run
automl_run = exp.submit(automl_config, show_output=True)
RunDetails(hyperdrive_run).show()

In [None]:
# Retrieve and save your best automl model.
automl_best_run, model = automl_run.get_output()
automl_best_model = automl_best_run.register_model(model_name="automl_best_model" , model_path="./")
automl_best_run
automl_best_run_metrics = automl_best_run.get_metrics()
automl_best_run_details = automl_best_run.get_details()
print("Best run: ", automl_best_run.id)
print("Acurracy: ", automl_best_run_metrics["Accuracy"])
print("Parameters: ", parameters)