In [73]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-246588
Azure region: westeurope
Subscription id: d4ad7261-832d-46b2-b093-22156001df5b
Resource group: aml-quickstarts-246588


In [74]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.exceptions import ComputeTargetException

cluster_name = "THAIHOC"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

try:
    cluster = AmlCompute(ws, cluster_name)
except ComputeTargetException:
    cluster_config = AmlCompute.provisioning_configuration(vm_size="Standrad_D2_V2", max_nodes=4)
    cluster = ComputeTarget.create(ws, cluster_name, cluster_config)

In [75]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        "C": uniform(0, 1),
        "max_iter": choice(50, 100, 200)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor=0.2)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory="./",   
    script="train.py",
    compute_target=cluster,
    environment=sklearn_env,
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    primary_metric_name="Accuracy",
    max_total_runs=20,    
)

In [58]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
run = exp.submit(config=hyperdrive_config, show_output=True)
RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [None]:
import joblib
# Get your best run and save the model from that run.
best_run = run.get_best_run_by_primary_metric()
joblib.dump(value=best_run, filename="best_run.pkl")

In [76]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
ds = TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [77]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
x['y'] = y

In [78]:
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.constants import Tasks

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig (
    experiment_timeout_minutes=30,
    task=Tasks.CLASSIFICATION,
    primary_metric="accuracy",
    training_data=x,
    label_column_name='y',
    n_cross_validations=5)

In [79]:
# Submit your automl run
from azureml.core.experiment import Experiment
experiment = Experiment(ws, "automl_experiment")
autml_run = experiment.submit(config=automl_config, show_output=True)
RunDetails(autml_run).show()

No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
automl_experiment,AutoML_6ce00cf8-32c6-4650-b0b7-bd71f25a1e69,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.


In [None]:
# Retrieve and save your best automl model.

automl_best_run = autml_run.get_best_child()
joblib.dump(value=automl_best_run, filename="automl_best_run.joblib")

In [64]:
cluster.delete()

{'recall_score_weighted': 0.9173596358118361,
 'precision_score_macro': 0.80270409969364,
 'AUC_weighted': 0.9481196982524803,
 'matthews_correlation': 0.5510690341918092,
 'precision_score_weighted': 0.9115197375164812,
 'precision_score_micro': 0.9173596358118361,
 'log_loss': 0.26396494595392606,
 'weighted_accuracy': 0.9586611192426124,
 'average_precision_score_weighted': 0.9562749040664624,
 'AUC_macro': 0.9481196982524803,
 'recall_score_micro': 0.9173596358118361,
 'f1_score_macro': 0.7732006078905316,
 'norm_macro_recall': 0.5019907428303245,
 'accuracy': 0.9173596358118361,
 'f1_score_micro': 0.9173596358118361,
 'balanced_accuracy': 0.7509953714151623,
 'average_precision_score_micro': 0.9817412821617342,
 'average_precision_score_macro': 0.828285781122684,
 'recall_score_macro': 0.7509953714151623,
 'AUC_micro': 0.9809794533953824,
 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_2e6953a7-4a65-4538-85af-eacc9ed30ae0_17/accuracy_table',
 'f1_score_weighted': 0.