## Setup environment

In [1]:
!pip uninstall algtools azureml-core azureml-sdk -y
!pip install azureml-telemetry==1.0.48
!pip install algtools==0.0.12 --extra-index-url=https://test.pypi.org/simple

Uninstalling algtools-0.0.12:
  Successfully uninstalled algtools-0.0.12
Uninstalling azureml-core-1.0.48:
  Successfully uninstalled azureml-core-1.0.48
Looking in indexes: https://pypi.org/simple, https://test.pypi.org/simple
Collecting algtools==0.0.12
  Using cached https://test-files.pythonhosted.org/packages/f0/8e/157927c7e71fd868ed7ddffc497b202d42dac399c94f194fc74df55c4cfd/algtools-0.0.12-py3-none-any.whl
Collecting azureml-core==1.0.48 (from algtools==0.0.12)
  Using cached https://files.pythonhosted.org/packages/26/03/3d861852da59a81cb60794adc1247c7fa140c758e8e9ba202d2d357293a9/azureml_core-1.0.48-py2.py3-none-any.whl


Installing collected packages: azureml-core, algtools
Successfully installed algtools-0.0.12 azureml-core-1.0.48


In [1]:
from azureml.core import Workspace, RunConfiguration
from azureml.core.compute import ComputeTarget

from azureml_scaffold.run_on_azureml_service import Module, ModuleStep, run_pipeline, list_modules


ws = Workspace.get(
    name='lisal-amlservice',
    subscription_id='74eccef0-4b8d-4f83-b5f9-fa100d155b22',
    resource_group='lisal-dev'
)


#    name='lisal-amlservice',
#    subscription_id='ef3a6d54-2b53-49f6-b905-7c7ec83487a6',
#    resource_group='amlservice-resources'

## List modules registered under workspace

In [2]:
module_list = list_modules(workspace=ws)

Listing modules ...

Found 41 modules, among which 38 global modules, 3 workspace modules.


## Create modules and assign parameters

In [4]:
# import data module for train
import_data_for_train = Module.create(ws, 'Import Data')
import_data_for_train.assign_parameters({
    'Data source': 'AzureBlobStorage',
    'Authentication type': 'PublicOrSAS',
    'Blob URI': 'https://chjinchetestse1240092388.blob.core.windows.net/data/train.tsv',
    'File format': 'TSV',
    'URI file has header row': 'True'
})

# import data module for score
import_data_for_score = Module.create(ws, 'Import Data')
import_data_for_score.assign_parameters({
    'Data source': 'AzureBlobStorage',
     'Authentication type': 'PublicOrSAS',
    'Blob URI': 'https://chjinchetestse1240092388.blob.core.windows.net/data/valid.tsv',
    'File format': 'TSV',
    'URI file has header row': 'True'
})

# NER preprocess module for train
ner_preprocess_for_train = Module.create(ws, 'NER Preprocess')
ner_preprocess_for_train.assign_parameters({
    'BERT pretrained model': 'bert-base-cased',
    'Maximum sequence length': '128'
})

# NER preprocess module for score
ner_preprocess_for_score = Module.create(ws, 'NER Preprocess')
ner_preprocess_for_score.assign_parameters({
    'BERT pretrained model': 'bert-base-cased',
    'Maximum sequence length': '128'
})

# NER train module
ner_train = Module.create(ws, 'NER Train')
ner_train.assign_parameters({
    'BERT pretrained model': 'bert-base-cased',
    'Maximum sequence length': '128',
    'Number of training epochs': '1',
    'Warmup proportion': '0.4',
})

# NER score module
ner_score = Module.create(ws, 'NER Score')

Creating Module Import Data (version: 852, owner: Microsoft Corporation, created date: 2019-07-02 09:19:39)
Creating Module Import Data (version: 852, owner: Microsoft Corporation, created date: 2019-07-02 09:19:39)
Creating Module NER Preprocess (version: 4, owner: Jincheng Chen, created date: 2019-07-25 00:00:36)
Creating Module NER Preprocess (version: 4, owner: Jincheng Chen, created date: 2019-07-25 00:00:36)
Creating Module NER Train (version: 4, owner: Jincheng Chen, created date: 2019-07-25 00:00:36)
Creating Module NER Score (version: 4, owner: Jincheng Chen, created date: 2019-07-25 00:00:36)


## Connect module input/output ports


In [5]:
ner_preprocess_for_train.input_ports['Input data'].connect(import_data_for_train.output_ports['Results dataset'])
ner_train.input_ports['Input train data'].connect(ner_preprocess_for_train.output_ports['Output feature'])
ner_preprocess_for_score.input_ports['Input data'].connect(import_data_for_score.output_ports['Results dataset'])
ner_score.input_ports['Trained model'].connect(ner_train.output_ports['Output model'])
ner_score.input_ports['Input test data'].connect(ner_preprocess_for_score.output_ports['Output feature'])

## Submit experiment

In [6]:
# List AmlCompute targets under workspace
compute_name_list = [compute.name for compute in ComputeTarget.list(ws) if compute.type == 'AmlCompute']
print(f"Workspace {ws.name} has AmlCompute targets: {compute_name_list}")

Workspace lisal-amlservice has AmlCompute targets: ['cpu-112gb', 'always-on-ds2v2']


In [7]:
# Set compute name to run ModuleStep. 
compute_name = 'always-on-ds2v2'

# Set RunConfiguration
from azureml.core.environment import DEFAULT_GPU_IMAGE

global_run_config = RunConfiguration()
global_run_config.target = compute_name
global_run_config.environment.docker.enabled = True
global_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
global_run_config.environment.docker.gpu_support = True

In [8]:
# Create pipeline consisted of ModuleStep objects
# Here all ModuleStep uses the same compute target
pipeline = [ModuleStep(m, global_run_config) for m in [
    import_data_for_train,
    import_data_for_score,
    ner_preprocess_for_train,
    ner_preprocess_for_score,
    ner_train,
    ner_score
]]

# Submit and run pipeline
run_pipeline(steps=pipeline, experiment_name='NERTrain-demo-0722', workspace=ws)

== Creating ModuleStep: name=Import Data
   arguments=['python', '-m', 'azureml.studio.modulehost.module_invoker', '--module-name=azureml.studio.modules.dataio.import_data.import_data', '--please-specify-data-source', 'AzureBlobStorage', '--please-specify-authentication-type', 'PublicOrSAS', '--uri', 'https://chjinchetestse1240092388.blob.core.windows.net/data/train.tsv', '--file-format', 'TSV', '--uri-file-has-header-row', 'True', '--results-dataset', $AZUREML_DATAREFERENCE_Results_dataset]
   inputs=[]
   outputs=[$AZUREML_DATAREFERENCE_Results_dataset]

== Creating ModuleStep: name=Import Data
   arguments=['python', '-m', 'azureml.studio.modulehost.module_invoker', '--module-name=azureml.studio.modules.dataio.import_data.import_data', '--please-specify-data-source', 'AzureBlobStorage', '--please-specify-authentication-type', 'PublicOrSAS', '--uri', 'https://chjinchetestse1240092388.blob.core.windows.net/data/valid.tsv', '--file-format', 'TSV', '--uri-file-has-header-row', 'True', '

2019-07-25 00:14:32,209 Common     INFO       |   |   Create file 'data.dataset.parquet' via DataTableDatasetHandler - Start:
2019-07-25 00:14:32,209 ModuleHost INFO       |   |   |   Write DataTable into Dataset
2019-07-25 00:14:32,209 Common     INFO       |   |   |   Write pickle file 'data.dataset' - Start:
2019-07-25 00:14:32,278 Common     INFO       |   |   |   Write pickle file 'data.dataset' - End with 0.0681s elapsed.
2019-07-25 00:14:32,278 Common     INFO       |   |   |   Write to parquet file 'data.dataset.parquet'. Rows: 3250, Columns: 2. - Start:
2019-07-25 00:14:32,344 Common     INFO       |   |   |   Write to parquet file 'data.dataset.parquet'. Rows: 3250, Columns: 2. - End with 0.0662s elapsed.
2019-07-25 00:14:32,344 Common     INFO       |   |   Create file 'data.dataset.parquet' via DataTableDatasetHandler - End with 0.1348s elapsed.
2019-07-25 00:14:32,344 Common     INFO       |   |   Create sidecar file 'data.schema' - Start:
2019-07-25 00:14:32,357 Common   




StepRunId: 3a13414b-9cf9-4ca6-bd7f-24a36c4db60c
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourceGroups/lisal-dev/providers/Microsoft.MachineLearningServices/workspaces/lisal-amlservice/experiments/NERTrain-demo-0722/runs/3a13414b-9cf9-4ca6-bd7f-24a36c4db60c
StepRun( Import Data ) Status: Running

Streaming azureml-logs/70_driver_log.txt
Invoking official module by invoker 0.0.2.

args: (16 items)
--------------------------------------------
python
-m
azureml.studio.modulehost.module_invoker
--module-name=azureml.studio.modules.dataio.import_data.import_data
--please-specify-data-source
AzureBlobStorage
--please-specify-authentication-type
PublicOrSAS
--uri
https://chjinchetestse1240092388.blob.core.windows.net/data/train.tsv
--file-format
TSV
--uri-file-has-header-row
True
--results-dataset
/mnt/batch/tasks/shared/LS_root/jobs/lisal-amlservice/azureml/3a13414b-9cf9-4ca6-bd7f-24a36c4db60c/mounts/workspaceblobstore/azureml

2019-07-25 00:14:34,591 Common     INFO       |   |   Create sidecar file 'data.schema' - End with 0.1089s elapsed.
2019-07-25 00:14:34,591 Common     INFO       |   |   Create sidecar file 'data.visualization' - Start:
2019-07-25 00:14:34,602 Common     INFO       |   |   |   Convert DataTable to json with statistics and graph layout
2019-07-25 00:14:34,624 Common     INFO       |   |   |   Compute statistics of DataTable
2019-07-25 00:14:34,633 Module     INFO       |   |   |   Convert column Unique Values to int64 type
2019-07-25 00:14:34,634 Module     INFO       |   |   |   Convert column Missing Values to int64 type
2019-07-25 00:14:34,635 Common     INFO       |   |   |   Compute graph layout of DataTable
2019-07-25 00:14:34,689 Common     INFO       |   |   |   Convert data to json
2019-07-25 00:14:34,712 Common     INFO       |   |   |   Convert statistics to json
2019-07-25 00:14:34,713 Common     INFO       |   |   |   Convert graph layout to json
2019-07-25 00:14:34,724 Com




StepRunId: 778422b4-ef0f-4615-baf5-41168f9b6801
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourceGroups/lisal-dev/providers/Microsoft.MachineLearningServices/workspaces/lisal-amlservice/experiments/NERTrain-demo-0722/runs/778422b4-ef0f-4615-baf5-41168f9b6801
StepRun( NER Preprocess ) Status: Running

Streaming azureml-logs/70_driver_log.txt
Invoking custom module by invoker 0.0.2.

args: (11 items)
--------------------------------------------
python
-m
script.preprocess
--input_data_frame_path
/mnt/batch/tasks/shared/LS_root/jobs/lisal-amlservice/azureml/778422b4-ef0f-4615-baf5-41168f9b6801/mounts/workspaceblobstore/azureml/2e7a6ad8-00ed-478f-a19d-4e0200551db1/Results_dataset
--bert_model
bert-base-cased
--max_seq_length
128
--output_feature_dir
/mnt/batch/tasks/shared/LS_root/jobs/lisal-amlservice/azureml/778422b4-ef0f-4615-baf5-41168f9b6801/mounts/workspaceblobstore/azureml/778422b4-ef0f-4615-baf5-41168f9b6801/Output_fe


StepRun(NER Preprocess) Execution Summary
StepRun( NER Preprocess ) Status: Finished
{'runId': '778422b4-ef0f-4615-baf5-41168f9b6801', 'target': 'always-on-ds2v2', 'status': 'Completed', 'startTimeUtc': '2019-07-25T00:15:14.101563Z', 'endTimeUtc': '2019-07-25T00:15:37.479788Z', 'properties': {'azureml.runsource': 'azureml.StepRun', 'ContentSnapshotId': '37c1fb6c-246d-4fb3-9fc7-7eb0cab4e90d', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.pipelinerunid': '6fde383f-759a-4452-b344-2f42383dc601', '_azureml.ComputeTargetType': 'batchai', 'AzureML.DerivedImageName': 'azureml/azureml_86753b62ca88f156ee8f9b095cb58080'}, 'runDefinition': {'script': 'invoker.py', 'arguments': ['python', '-m', 'script.preprocess', '--input_data_frame_path', '$AZUREML_DATAREFERENCE_Results_dataset', '--bert_model', 'bert-base-cased', '--max_seq_length', '128', '--output_feature_dir', '$AZUREML_DATAREFERENCE_Output_feature'], 'sourceDirectoryDataStore': None, 'framework': 'Python', 'co




StepRunId: 975f4364-8115-42d0-a7b1-cb181e62d3ca
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourceGroups/lisal-dev/providers/Microsoft.MachineLearningServices/workspaces/lisal-amlservice/experiments/NERTrain-demo-0722/runs/975f4364-8115-42d0-a7b1-cb181e62d3ca
StepRun( NER Train ) Status: NotStarted
StepRun( NER Train ) Status: Running

Streaming azureml-logs/70_driver_log.txt
Invoking custom module by invoker 0.0.2.

args: (15 items)
--------------------------------------------
python
-m
script.train
--train_feature_dir
/mnt/batch/tasks/shared/LS_root/jobs/lisal-amlservice/azureml/975f4364-8115-42d0-a7b1-cb181e62d3ca/mounts/workspaceblobstore/azureml/d8bb2e8e-0ca3-4767-858a-943415b7c970/Output_feature
--bert_model
bert-base-cased
--max_seq_length
128
--num_train_epochs
1
--warmup_proportion
0.4
--output_model_dir
/mnt/batch/tasks/shared/LS_root/jobs/lisal-amlservice/azureml/975f4364-8115-42d0-a7b1-cb181e62d3ca/mounts/works

07/25/2019 00:17:10 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz from cache at /root/.pytorch_pretrained_bert/distributed_-1/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c
07/25/2019 00:17:10 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /root/.pytorch_pretrained_bert/distributed_-1/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c to temp dir /tmp/tmpfvah2zvl
07/25/2019 00:17:18 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_v

ActivityFailedException: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "-9",
        "details": [],
        "debugInfo": {
            "type": "SystemExit",
            "message": "-9",
            "stackTrace": "  File \"azureml-setup/context_manager_injector.py\", line 95, in execute_with_context\n    runpy.run_path(sys.argv[0], globals(), run_name=\"__main__\")\n  File \"/azureml-envs/azureml_c1d097ebd9fdcf5266c28c3afbbba0bb/lib/python3.6/runpy.py\", line 263, in run_path\n    pkg_name=pkg_name, script_name=fname)\n  File \"/azureml-envs/azureml_c1d097ebd9fdcf5266c28c3afbbba0bb/lib/python3.6/runpy.py\", line 96, in _run_module_code\n    mod_name, mod_spec, pkg_name, script_name)\n  File \"/azureml-envs/azureml_c1d097ebd9fdcf5266c28c3afbbba0bb/lib/python3.6/runpy.py\", line 85, in _run_code\n    exec(code, run_globals)\n  File \"invoker.py\", line 51, in <module>\n    exit(ret)\n  File \"/azureml-envs/azureml_c1d097ebd9fdcf5266c28c3afbbba0bb/lib/python3.6/_sitebuiltins.py\", line 26, in __call__\n    raise SystemExit(code)\n"
        }
    }
}