In [1]:
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.pipeline.wrapper import Module, dsl

In [2]:
tenant_id = "72f988bf-86f1-41af-91ab-2d7cd011db47"
InteractiveLoginAuthentication(tenant_id=tenant_id)
workspace = Workspace.from_config('config.json')
print(workspace.name, workspace.resource_group, workspace.location, workspace.subscription_id,
      workspace.compute_targets.keys(), sep='\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


fundamental3
fundamental
eastasia
4f455bd0-f95a-4b7d-8d08-078611508e0b
dict_keys(['myaks2', 'aml-compute', 'my-compute', 'compute-deploy'])


In [3]:
aml_compute_name = 'aml-compute'
try:
    aml_compute = AmlCompute(workspace, aml_compute_name)
    print("Found existing compute target: {}".format(aml_compute_name))
except:
    print("Creating new compute target: {}".format(aml_compute_name))

    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(workspace, aml_compute_name, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Found existing compute target: aml-compute


In [4]:
# load data
dataset_name = "THUCNews"
dataset = workspace.datasets[dataset_name]

dataset_name = "THUCNews_For_Batch_Inference"
dataset_score  = workspace.datasets[dataset_name]

In [5]:
# load module
split_data_txt_module_func = Module.from_yaml(workspace, 'split_data_txt/split_data_txt.spec.yaml')
fasttext_train_module_func = Module.from_yaml(workspace, 'fasttext_train/fasttext_train.spec.yaml')
fasttext_evaluation_module_func = Module.from_yaml(workspace, 'fasttext_evaluation/fasttext_evaluation.spec.yaml')
fasttext_score_module_func = Module.from_yaml(workspace, 'fasttext_score/fasttext_score.spec.yaml')
compare_two_models_module_func = Module.from_yaml(workspace, 'compare_two_models/compare_two_models.spec.yaml')


In [6]:
@dsl.pipeline(name='training_pipeline', description='A sub pipeline including data processing/train/evaluation',
              default_compute_target=aml_compute_name)
def training_pipeline(epochs):
    split_data_txt = split_data_txt_module_func(
        input_dir=dataset,
        training_data_ratio=0.1,
        validation_data_ratio=0.2
    )
    fasttext_train = fasttext_train_module_func(
        training_data_dir=split_data_txt.outputs.training_data_output,
        validation_data_dir=split_data_txt.outputs.validation_data_output,
        epochs=epochs
    )

    fasttext_evaluation = fasttext_evaluation_module_func(
        trained_model_dir=fasttext_train.outputs.trained_model_dir,
        test_data_dir=split_data_txt.outputs.test_data_output
    )

    return {**fasttext_evaluation.outputs, **fasttext_train.outputs}

In [15]:
@dsl.pipeline(name='dummy_automl_pipeline',
              description='A dummy pipeline that trains two models and output the better one',
              default_compute_target=aml_compute_name)
def dummy_automl_pipeline():
    train_and_evalute_model1 = training_pipeline(epochs=5)
    train_and_evalute_model2 = training_pipeline(epochs=10)
    compare = compare_two_models_module_func(
        first_trained_model=train_and_evalute_model1.outputs.trained_model_dir,
        first_trained_result=train_and_evalute_model1.outputs.model_testing_result,
        second_trained_model=train_and_evalute_model2.outputs.trained_model_dir,
        second_trained_result=train_and_evalute_model2.outputs.model_testing_result
    )

    # fasttext_score = fasttext_score_module_func(
    #     texts_to_score=dataset_score,
    #     fasttext_model_dir=compare.outputs.the_better_model
    # )
    # fasttext_score.runsettings.configure(node_count=2, process_count_per_node=2, mini_batch_size="64")
    return {**compare.outputs}


In [16]:
# pipeline
pipeline = dummy_automl_pipeline()
# pipeline.save(experiment_name=experiment_name)

In [17]:
# validate
# pipeline.validate()

In [18]:
# pipeline_run
experiment_name = 'fasttext_training_process'
pipeline_run = pipeline.submit(experiment_name=experiment_name, regenerate_outputs=False)
# pipeline_run.wait_for_completion()

Submitted PipelineRun 0f59b0d6-b343-4639-8d75-da4b91230007
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/fasttext_training_process/runs/0f59b0d6-b343-4639-8d75-da4b91230007?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3
