In [1]:
import json
import inspect
from azureml.core import Workspace, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.pipeline.wrapper import Module, dsl, Pipeline
from azureml.pipeline.wrapper._pipeline import _pipeline_to_dict
from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path

In [2]:
subscription_id = '4f455bd0-f95a-4b7d-8d08-078611508e0b'
resource_group = 'fundamental'
workspace_name = 'fundamental'

workspace = Workspace(subscription_id, resource_group, workspace_name)
print(workspace.name, workspace.resource_group, workspace.location, workspace.subscription_id, workspace.compute_targets.keys(),sep = '\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


fundamental
fundamental
eastus
4f455bd0-f95a-4b7d-8d08-078611508e0b
dict_keys(['fundamental-d3v2', 'default', 'cpu-cluster', 'aml-compute'])


In [3]:
aml_compute_name = 'aml-compute'
aml_compute = AmlCompute(workspace, aml_compute_name)
print(aml_compute)

AmlCompute(workspace=Workspace.create(name='fundamental', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=aml-compute, id=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundamental/computes/aml-compute, type=AmlCompute, provisioning_state=Succeeded, location=eastus, tags=None)


In [4]:
# load data
dataset_name = 'THUCNews_TXT'
char2index_name = 'Char2Index_JSON'

data = workspace.datasets[dataset_name]
char2index = workspace.datasets[char2index_name]
print(data)
print(char2index)

FileDataset
{
  "source": [
    "('workspaceblobstore', 'UI/06-13-2020_021050_UTC/THUCNews.txt')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "d138049a-c5bb-40ef-b9ae-d2af9c3528de",
    "name": "THUCNews_TXT",
    "version": 1,
    "workspace": "Workspace.create(name='fundamental', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental')"
  }
}
FileDataset
{
  "source": [
    "('workspaceblobstore', 'UI/06-13-2020_020957_UTC/character2index.json')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "c5e6e1ce-967b-4422-96ff-bb7dbe973381",
    "name": "Char2Index_JSON",
    "version": 1,
    "workspace": "Workspace.create(name='fundamental', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental')"
  }
}


In [5]:
# load module
namespace='fundamental'
split_data_txt_module_func = Module.load(workspace=workspace, namespace=namespace, name='Split Data Txt')
fasttext_train_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Train')
fasttext_test_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Test')
# inspect signature
print(inspect.signature(split_data_txt_module_func))
print(inspect.signature(fasttext_train_module_func))
print(inspect.signature(fasttext_test_module_func))

(input_dir:'Input Dir'=None, training_data_ratio:'Training Data Ratio'='0.7', validation_data_ratio:'Validation Data Ratio'='0.1', random_split:'Random Split'='False', seed:'Seed'='0')
(training_data_dir:'Training Data Dir'=None, validation_data_dir:'Validation Data Dir'=None, char2_index_dir:'Char2Index Dir'=None, epochs:'Epochs'='2', batch_size:'Batch Size'='32', learning_rate:'Learning Rate'='0.0005', embedding_dim:'Embedding Dim'='128')
(trained_model_dir:'Trained Model Dir'=None, test_data_dir:'Test Data Dir'=None, char2_index_dir:'Char2Index Dir'=None)


In [6]:
# connect module
split_data_txt = split_data_txt_module_func(
    input_dir = data,
    training_data_ratio = 0.7,
    validation_data_ratio = 0.1,
    random_split = False,
    seed = 1
)
print(split_data_txt.outputs)

fasttext_train = fasttext_train_module_func(
    training_data_dir = split_data_txt.outputs.training_data_output,
    validation_data_dir = split_data_txt.outputs.validation_data_output,
    char2_index_dir = char2index,
    epochs = 1,
    batch_size = 64,
    learning_rate = 0.0005,
    embedding_dim = 128
)
print(fasttext_train.outputs)

fasttext_test = fasttext_test_module_func(
    trained_model_dir = fasttext_train.outputs.trained_model_dir,
    test_data_dir = split_data_txt.outputs.test_data_output,
    char2_index_dir = char2index
)

{'training_data_output': <azureml.pipeline.wrapper._module._OutputBuilder object at 0x000001B2C307FD30>, 'validation_data_output': <azureml.pipeline.wrapper._module._OutputBuilder object at 0x000001B2C22F9908>, 'test_data_output': <azureml.pipeline.wrapper._module._OutputBuilder object at 0x000001B2C22F9748>}
{'trained_model_dir': <azureml.pipeline.wrapper._module._OutputBuilder object at 0x000001B2C306E7F0>}


In [7]:
# pipeline
pipeline = Pipeline(nodes=[split_data_txt, fasttext_train, fasttext_test], workspace=workspace, default_compute_target=aml_compute_name)

In [8]:
# validate
pipeline.validate()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

SupportDetectView()

{'result': 'validation passed', 'errors': []}

In [9]:
# run
run = pipeline.submit(experiment_name='fasttext_test')
run.wait_for_completion()
pipeline.save(experiment_name='fasttext_test')

Submitted PipelineRun 6747eab3-1f8c-4277-ba83-5f1548232305
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/fasttext_test/runs/6747eab3-1f8c-4277-ba83-5f1548232305?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental
PipelineRunId: 6747eab3-1f8c-4277-ba83-5f1548232305
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/fasttext_test/runs/6747eab3-1f8c-4277-ba83-5f1548232305?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: c526b69d-d841-4b77-8c56-3a2a1ed034ae
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/fasttext_test/runs/c526b69d-d841-4b77-8c56-3a2a1ed034ae?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental

StepRun(Split Data Txt) Execution Summary
StepRun( Split Data Txt ) Status



PipelineRun Execution Summary
PipelineRun Status: Completed
{'runId': '6747eab3-1f8c-4277-ba83-5f1548232305', 'status': 'Completed', 'startTimeUtc': '2020-06-14T03:00:09.576667Z', 'endTimeUtc': '2020-06-14T03:00:24.401071Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'Designer', 'runType': 'HTTP', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://fundamental6374297605.blob.core.windows.net/azureml/ExperimentRun/dcid.6747eab3-1f8c-4277-ba83-5f1548232305/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=mHcnTZ20q6WzCnG2yAOjXvoSwimz5TlfjmbA7%2BD8DfM%3D&st=2020-06-14T02%3A50%3A29Z&se=2020-06-14T11%3A00%3A29Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://fundamental6374297605.blob.core.windows.net/azureml/ExperimentRun/dcid.6747eab3-1f8c-4277-ba83-5f1548232305/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=0fVdOM5LwFZpBayn04jheyFcFiEkScgthjKkpkVT9co%3D&st=2020-06-14T02%3A50%3A29Z&se=2020-06-14T11

Name,Id,Details page,Pipeline type,Updated on,Created by,Tags
Pipeline-Created-on-6-14-2020,c04c5a3a-4067-49da-8325-3c68226ce274,Link,TrainingPipeline,"June 14, 2020 11:00 AM",Xiaoyu Yang,azureml.Designer: true

0
azureml.Designer: true
