In [1]:
from azureml.core import Workspace, Dataset
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.pipeline.wrapper import Module, dsl
import os
from azureml.core import Model, Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.compute import AksCompute, ComputeTarget
from azureml.core.environment import Environment
from azureml.core.experiment import Experiment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import LocalWebservice, AciWebservice, AksWebservice
from azureml.pipeline.wrapper import PipelineRun, StepRun
from azureml.core import Workspace, Dataset, Datastore

In [2]:
tenant_id = "72f988bf-86f1-41af-91ab-2d7cd011db47"
InteractiveLoginAuthentication(tenant_id=tenant_id)
workspace = Workspace.from_config('config.json')
print(workspace.name, workspace.resource_group, workspace.location, workspace.subscription_id,
      workspace.compute_targets.keys(), sep='\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


fundamental3
fundamental
eastasia
4f455bd0-f95a-4b7d-8d08-078611508e0b
dict_keys(['myaks2', 'aml-compute', 'my-compute', 'compute-deploy'])


In [3]:
aml_compute_name = 'aml-compute'
try:
    aml_compute = AmlCompute(workspace, aml_compute_name)
    print("Found existing compute target: {}".format(aml_compute_name))
except:
    print("Creating new compute target: {}".format(aml_compute_name))

    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4)
    aml_compute = ComputeTarget.create(workspace, aml_compute_name, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Found existing compute target: aml-compute


In [4]:
# load data
dataset_name = "THUCNews_For_Batch_Inference"
dataset_score  = workspace.datasets[dataset_name]

In [5]:
#load module
fasttext_score_module_func = Module.from_yaml(workspace, 'fasttext_score/fasttext_score.spec.yaml')

In [6]:
#load model
### Get a list of experiment names from the workspace

In [7]:
exp_name_list = [exp.name for exp in Experiment.list(workspace)]
exp_name_list

['fasttext_test',
 'sample-pipelines',
 'automobile',
 'fasttext_predict',
 'sample-pipelines2',
 'fasttext_with_two_training_process',
 'train-within-notebook',
 'train-on-local',
 'logging-api-test',
 'fasttext_with_one_training_process',
 'fasttext_train',
 'my_test',
 'split_data_txt',
 'compare_two_models',
 'yucongj-test',
 'fasttext_parallel_score',
 'parallel',
 'dir',
 'test0717',
 'test_0727',
 'test_0727_experiment',
 'localtest',
 'mpi_0729',
 'mpi_0729_experiment',
 'test',
 'para_0729',
 'para_0729_experiment',
 'basic_0721',
 'basic_0721_experiment',
 'deploy',
 'fasttext_training_process']

### Choose the experiment you want with its name

In [8]:
experiment_name = "fasttext_training_process"
experiment = Experiment(workspace, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
fasttext_training_process,fundamental3,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
# azureml.pipeline.core.run.PipelineRun
run = experiment.get_runs().__next__()
run

Experiment,Id,Type,Status,Details Page,Docs Page
fasttext_training_process,99b6972b-cc43-4e41-8782-8cad60126a16,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


### Get a PipelineRun object

In [13]:
run_id = run.id
# azureml.pipeline.wrapper._pipeline_run.PipelineRun
pipeline_run = PipelineRun(experiment, run_id)
pipeline_run

Experiment,Id,Type,Status,Details Page,Docs Page
fasttext_training_process,99b6972b-cc43-4e41-8782-8cad60126a16,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


### Visualize the pipeline

In [14]:
step_run_list = pipeline_run.find_step_run(name='FastText Train')
for s in step_run_list:
    print(s,'\n\n')

Run(Experiment: fasttext_training_process,
Id: 234aff78-94c1-4e5e-a1e3-a140eb0489d0,
Type: azureml.StepRun,
Status: Completed) 


Run(Experiment: fasttext_training_process,
Id: e26713b3-25af-41ea-8601-e4e8b86534c8,
Type: azureml.StepRun,
Status: Completed) 




### Get a StepRun object

In [15]:
# 应该给这个方法加一个id参数, 就像Experiment.list()可以指定实验名字那样, 都是返回list
step_run = pipeline_run.find_step_run(name='Compare Two Models')[0]
step_run

Experiment,Id,Type,Status,Details Page,Docs Page
fasttext_training_process,06dcef4a-f9c0-4411-b33b-4f80d77c9aab,azureml.StepRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


### Download the trained model from azure storage

In [17]:
step_run.output_ports


[OutputPort(Name:The better model,
 Type:AnyDirectory,
 StepRun:Run(Experiment: fasttext_training_process,
 Id: 06dcef4a-f9c0-4411-b33b-4f80d77c9aab,
 Type: azureml.StepRun,
 Status: Completed))]

In [24]:
port = step_run.get_port(name='The better model')
data_path = port.get_data_path()
model = Dataset.File.from_files(path=[data_path]).as_named_input('model_for_batch_inference')
# saved_path = port.download(overwrite=True)
# print('model save at: {}'.format(saved_path))

### Register the trained model for deployment

In [19]:
# model_name='model_for_batch_inference'
# model = Model.register(workspace, model_path=saved_path, model_name=model_name, tags={'batch_inference': 1})

Registering model model_for_batch_inference


In [25]:
@dsl.pipeline(name='batch inference', description='Batch Inference', default_compute_target=aml_compute.name)
def training_pipeline():
    fasttext_score = fasttext_score_module_func(
        texts_to_score=dataset_score,
        fasttext_model_dir=model
    )
    fasttext_score.runsettings.configure(node_count=2, process_count_per_node=2, mini_batch_size="64")

    return {**fasttext_score.outputs}

In [26]:
# pipeline
pipeline = training_pipeline()
# pipeline.save(experiment_name=experiment_name)

In [27]:
# validate
# pipeline.validate()

In [28]:
# pipeline_run
experiment_name = 'fasttext_batch_inference'
pipeline_run = pipeline.submit(experiment_name=experiment_name, regenerate_outputs=True)
# pipeline_run.wait_for_completion()

Submitted PipelineRun 5891673a-eba2-4450-b858-dc4228378b6f
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/fasttext_batch_inference/runs/5891673a-eba2-4450-b858-dc4228378b6f?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3
