In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-242065
Azure region: southcentralus
Subscription id: 81cefad3-d2c9-4f77-a466-99a7f541c7bb
Resource group: aml-quickstarts-242065


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "Udacity-Project-Cluster2"
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size= 'Standard_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)


cpu_cluster.wait_for_completion(show_output=True)



Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [19]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

ps = RandomParameterSampling(
    { 
        '--C': choice(0.1, 1.0,10),
        '--max_iter': choice(['5', '10', '20'])
    }
)

policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=0)
#policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

src = ScriptRunConfig(source_directory='./',
                                script='train.py',
                                environment=sklearn_env, 
                                compute_target=cluster_name)


hyperdrive_config  = HyperDriveConfig(hyperparameter_sampling = ps , primary_metric_name = "Accuracy", primary_metric_goal = PrimaryMetricGoal.MAXIMIZE, max_total_runs=20, max_concurrent_runs=2, max_duration_minutes=10080, policy=policy, estimator=None, run_config=src, resume_from=None, resume_child_runs=None, pipeline=None, debug_flag=None, custom_run_id=None)

hdr= exp.submit(config=hyperdrive_config)



In [8]:
RunDetails(hdr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [23]:
import joblib
from azureml.train.hyperdrive.run import HyperDriveRun
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run = HyperDriveRun.get_best_run_by_primary_metric(hdr)
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']#['Arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
#print('\n learning rate:',parameter_values[3])
#print('\n keep probability:',parameter_values[5])
#print('\n batch size:',parameter_values[7])

best_run.get_file_names()


Best Run Id:  HD_442691b8-91ba-4f1e-b65b-df67f0d39c92_6

 Accuracy: 0.9059201884313992


['logs/azureml/dataprep/0/rslex.log.2023-09-30-16',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/metrics_capability/metrics-capability.log',
 'system_logs/snapshot_capability/snapshot-capability.log',
 'user_logs/std_log.txt']

In [13]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset, Datastore
from azureml.data.datapath import DataPath
ds = Dataset.Tabular.from_delimited_files(path='https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv')

In [14]:
from train import clean_data
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds) ### YOUR DATA OBJECT HERE ###)
dataset = pd.concat([x,y], axis=1)

In [25]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(    
    experiment_timeout_minutes=30,
    enable_onnx_compatible_models=True,
    task= 'classification',
    primary_metric= 'AUC_weighted',
    training_data=dataset,
    label_column_name='y',
    n_cross_validations=2)

In [26]:
run = exp.submit(config=automl_config, show_output=True)


No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_d44591fd-1c4c-40d9-a54c-6a9faa71509a,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.





********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+--------------------------------------+
|Size of the smallest class    |Name/Label of the smallest class|Number of samples in the training data|
|3692                          |1                               |32950                                 |
+------------------------------+--------------------------------+--------------------------------------+

*********************************************************************

2023-09-30:18:33:24,792 INFO     [explanation_client.py:334] Using default datastore for uploads


Current status: EngineeredFeatureExplanations. Computation of engineered features completed
Current status: RawFeaturesExplanations. Computation of raw features started
Current status: RawFeaturesExplanations. Computation of raw features completed
Current status: BestRunExplainModel. Best run model explanations completed
********************************************************************************************


In [27]:
from azureml.automl.runtime.onnx_convert import OnnxConverter

best_aml_model, onnx_model =run.get_output(return_onnx_model=True)
OnnxConverter.save_onnx_model(onnx_model, "./automl_model.onnx")

In [21]:

automl_config2 = AutoMLConfig(    
    experiment_timeout_minutes=30,
    enable_onnx_compatible_models=True,
    task= 'regression',
    primary_metric= 'r2_score',
    training_data=dataset,
    label_column_name='y',
    n_cross_validations=2)

automl_run2 = exp.submit(config=automl_config2, show_output=True)
best_aml_model2, onnx_model2 =automl_run2.get_output(return_onnx_model=True)
OnnxConverter.save_onnx_model(onnx_model2, "./automl_model2.onnx")

No run_configuration provided, running on local with default configuration
Running in the active local environment.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_9b5ccea5-9cbf-418a-802c-e408de26b557,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.





********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

********************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more about high cardinality feature handling: https://aka.ms/AutomatedMLFeaturization

********************************************************************************************
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
ITER: The iteration being evaluated.
P

2023-09-30:17:26:26,692 INFO     [explanation_client.py:334] Using default datastore for uploads


Current status: EngineeredFeatureExplanations. Computation of engineered features completed
Current status: RawFeaturesExplanations. Computation of raw features started
Current status: RawFeaturesExplanations. Computation of raw features completed
Current status: BestRunExplainModel. Best run model explanations completed
********************************************************************************************
