In [3]:
from azureml.core import Workspace, Experiment, Environment
import os

# Setting up the workspace
# From a config.json file
ws = Workspace.from_config()

# workspace_name = os.environ.get('WORKSPACE_NAME', 'udacity-projects')
# ws = Workspace.get(name=workspace_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

# Setup the experiment
experiment_name = os.environ.get('EXPERIMENT_NAME', 'az-ml-project-1-hd')
exp = Experiment(workspace=ws, name=experiment_name)

# Setup the environment
# From a Conda specification file
env = Environment.from_conda_specification(name = "az-ml", file_path = "envs/standard_env.yml")

# From a pip requirements file
# env = Environment.from_pip_requirements(name = "az-ml", file_path = "path-to-pip-requirements-file")

# Registering and building the environment
# env = env.register(workspace=ws)
# env_build = env.build(workspace=ws)

run = exp.start_logging()

UserErrorException: UserErrorException:
	Message: You are currently logged-in to b914a242-e718-443b-a47c-6b4c649d8c0a tenant. You don't have access to 5a4ab2ba-6c51-4805-8155-58759ad589d8 subscription, please check if it is in this tenant. All the subscriptions that you have access to in this tenant are = 
 [SubscriptionInfo(subscription_name='CCF - Central Hub (APL208920), PSP 9914.V00114.001', subscription_id='049a0549-6017-4270-8bb3-adcf29211e23'), SubscriptionInfo(subscription_name='CCF - AAA Sophia (160871), PSP 9914.P01682.001', subscription_id='d68cf00c-9437-4250-bb55-a32d545def4c')]. 
 Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "You are currently logged-in to b914a242-e718-443b-a47c-6b4c649d8c0a tenant. You don't have access to 5a4ab2ba-6c51-4805-8155-58759ad589d8 subscription, please check if it is in this tenant. All the subscriptions that you have access to in this tenant are = \n [SubscriptionInfo(subscription_name='CCF - Central Hub (APL208920), PSP 9914.V00114.001', subscription_id='049a0549-6017-4270-8bb3-adcf29211e23'), SubscriptionInfo(subscription_name='CCF - AAA Sophia (160871), PSP 9914.P01682.001', subscription_id='d68cf00c-9437-4250-bb55-a32d545def4c')]. \n Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk."
    }
}

In [6]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Setup the compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
compute_name = os.environ.get('CLUSTER_NAME', 'cpu-cluster')
compute_min_nodes = os.environ.get('CLUSTER_MIN_NODES', 0)
compute_max_nodes = os.environ.get('CLUSTER_MAX_NODES', 4)
vm_size = os.environ.get('CLUSTER_SKU', 'STANDARD_D2_V2')

# Verify if the compute cluster exists
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size=vm_size,
        min_nodes=compute_min_nodes,
        max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

NameError: name 'ws' is not defined

In [7]:
from azureml.widgets import RunDetails
# from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core import ScriptRunConfig

# Setup hyperparameter tuning

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        'C': choice(0.001, 0.01, 0.1, 1, 100, 1000),
        'max_iter': choice(100, 250, 500)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor=0.1)

# Get the previously registered environment
# env = Environment.get(workspace=ws, name="az-ml")

# Create an estimator for use with train.py and pass in the environment
est = ScriptRunConfig(
    source_directory="./scripts",
    script="train.py",
    compute_target=compute_target,
    environment=env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name="Accuracy",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=100,
    max_concurrent_runs=4)

SyntaxError: invalid syntax (2610999436.py, line 33)

In [8]:
# Submit hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()

In [None]:
import joblib

# Get best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print(best_run_metrics)

In [None]:
print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n learning rate:',parameter_values[3])
print('\n keep probability:',parameter_values[5])
print('\n batch size:',parameter_values[7])

model = best_run.register_model(model_name='bankmkt-hd', model_path='models/bankmkt-hd.joblib')
model.download(target_dir="models", exist_ok=True)

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(url)

In [None]:
from scripts.train import clean_data
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

if "training" not in os.listdir():
    os.mkdir("./training")

# Since we're using AutoML, we do not need to split data now
# Actually we will rather pass in a joined data object for AutoML
# data_train = x.join(y)
data_train = pd.concat([x, y], axis=1)
data_train.to_csv('./training/data_train.csv')

from azureml.core import Dataset

datastore = ws.get_default_datastore()
datastore.upload(src_dir='./training', target_path='./training')
ds = TabularDatasetFactory.from_delimited_files(datastore.path("./training/data_train"))

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='AUC_weighted',
    training_data=ds.to_pandas_dataframe(),
    label_column_name='y',
    n_cross_validations=2)

In [None]:
# Submit automl run
exp = Experiment(workspace=ws, name='bankmkt-automl')
automl_run = exp.submit(automl_config, show_output=False)
automl_run.wait_for_completion(show_output=True)

In [None]:
# Retrieve and save best automl model.
best_run, model = automl_run.get_output()
print(best_run)
print(model)
joblib.dump(value=best_run.id, filename=os.path.join(os.getcwd(), "models/bankmkt_automl_model.joblib"))