In [1]:
import inspect
from azureml.core import Workspace, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.pipeline.core.graph import DataType
from azureml.pipeline.wrapper import Module, dsl, Pipeline
from azureml.core.authentication import InteractiveLoginAuthentication

In [2]:
subscription_id = '4f455bd0-f95a-4b7d-8d08-078611508e0b'
resource_group = 'fundamental'
workspace_name = 'fundamental3'
namespace=workspace_name # for loading module
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)

workspace = Workspace(subscription_id, resource_group, workspace_name)
print(workspace.name, workspace.resource_group, workspace.location, workspace.subscription_id, workspace.compute_targets.keys(),sep = '\n')

fundamental3
fundamental
eastasia
4f455bd0-f95a-4b7d-8d08-078611508e0b
dict_keys(['myaks1', 'aml-compute', 'my-compute'])


In [3]:
# choose compute target
print(workspace.compute_targets)
aml_compute_name = 'aml-compute'
try:
    aml_compute = AmlCompute(workspace, aml_compute_name)
    print("Found existing compute target: {}".format(aml_compute_name))
except:
    print("Creating new compute target: {}".format(aml_compute_name))

    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                                min_nodes=1,
                                                                max_nodes=4)
    aml_compute = ComputeTarget.create(workspace, aml_compute_name, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
# print(aml_compute)

{'myaks1': AksCompute(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=myaks1, id=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundamental3/computes/myaks1, type=AKS, provisioning_state=Failed, location=eastasia, tags=None), 'aml-compute': AmlCompute(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=aml-compute, id=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundamental3/computes/aml-compute, type=AmlCompute, provisioning_state=Succeeded, location=eastasia, tags=None), 'my-compute': {
  "id": "/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundame

In [4]:
# register my own datatype
DataType.create_data_type(workspace, 'MyDirectory', description='', is_directory=True) # won't register repeatedly
DataType.create_data_type(workspace, 'MyFile', description='', is_directory=False)

<azureml.pipeline.core.graph.DataType at 0x7fa9c651c518>

In [5]:
# load data
dataset_name = 'THUCNews_TXT'
char2index_name = 'Char2Index_JSON'

if dataset_name not in workspace.datasets:
    print('Registering a THUCNews dataset for fasttext pipeline ...')
    path = ['https://datastore4fasttext.file.core.windows.net/data4fasttext/THUCNews.txt']
    data = Dataset.File.from_files(path=path)
    data.register(workspace=workspace, name=dataset_name, description='THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011')
    print('Registerd')
data = workspace.datasets[dataset_name]

if char2index_name not in workspace.datasets:
    print('Registering a Char2Index_JSON for fasttext pipeline ...')
    path = ['https://datastore4fasttext.file.core.windows.net/data4fasttext/character2index.json']
    data = Dataset.File.from_files(path=path)
    data.register(workspace=workspace, name=char2index_name, description='The mapping relationship between character and index ')
    print('Registerd')
char2index = workspace.datasets[char2index_name]

print(data)
print(char2index)

FileDataset
{
  "source": [
    "https://datastore4fasttext.blob.core.windows.net/mytest3/THUCNews.txt"
  ],
  "definition": [
    "GetFiles"
  ],
  "registration": {
    "id": "9e16ea04-3074-4f84-8a8c-83adb226c4ae",
    "name": "THUCNews_TXT",
    "version": 1,
    "description": "THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011",
    "workspace": "Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental')"
  }
}
FileDataset
{
  "source": [
    "https://datastore4fasttext.blob.core.windows.net/mytest3/character2index.json"
  ],
  "definition": [
    "GetFiles"
  ],
  "registration": {
    "id": "9de9b550-46a4-41ce-b1b4-54b6c665fcf3",
    "name": "Char2Index_JSON",
    "version": 1,
    "description": "The mapping relationship between character and index ",
    "workspace": "Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-

In [6]:
# load module
try:
    split_data_txt_module_func = Module.load(workspace=workspace, namespace=namespace, name='Split Data Txt')
    print('found split_data_txt_module')
except:
    print('not found split_data_txt_module, register it now...')
    yaml_file='split_data_txt/split_data_txt.spec.yaml'
    split_data_txt_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    split_data_txt_parallel_module_func = Module.load(workspace=workspace, namespace=namespace, name='Split Data Txt Parallel')
    print('found split_data_txt_parallel_module')
except:
    print('not found split_data_txt_parallel_module, register it now...')
    yaml_file='split_data_txt_parallel/split_data_txt_parallel.spec.yaml'
    split_data_txt_parallel_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    fasttext_train_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Train')
    print('found fasttext_train_module')
except:
    print('not found fasttext_train_module, register it now...')
    yaml_file='fasttext_train/fasttext_train.spec.yaml'
    fasttext_train_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    fasttext_evaluation_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Evaluation')
    print('found fasttext_evaluation_module')
except:
    print('not found fasttext_evaluation_module, register it now...')
    yaml_file='fasttext_evaluation/fasttext_evaluation.spec.yaml'
    fasttext_evaluation_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    fasttext_score_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Score')
    print('found fasttext_score_module')
except:
    print('not found fasttext_score_module, register it now...')
    yaml_file='fasttext_score/fasttext_score.spec.yaml'
    fasttext_score_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    fasttext_score_parallel_module_func = Module.load(workspace=workspace, namespace=namespace, name='FastText Score Parallel')
    print('found fasttext_score_parallel_module')
except:
    print('not found fasttext_score_parallel_module, register it now...')
    yaml_file='fasttext_score_parallel/fasttext_score_parallel.spec.yaml'
    fasttext_score_parallel_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)

try:
    compare_two_models_module_func = Module.load(workspace=workspace, namespace=namespace, name='Compare Two Models')
    print('found compare_two_models_module')
except:
    print('not found compare_two_models_module, register it now...')
    yaml_file='compare_two_models/compare_two_models.spec.yaml'
    compare_two_models_module_func = Module.register(workspace=workspace, yaml_file=yaml_file)



# inspect signature
# print(inspect.signature(split_data_txt_module_func))
# print(inspect.signature(split_data_txt_parallel_module_func))
# print(inspect.signature(fasttext_train_module_func))
# print(inspect.signature(fasttext_evaluation_module_func))
# print(inspect.signature(fasttext_score_module_func))
# print(inspect.signature(fasttext_score_parallel_module_func))
# print(inspect.signature(compare2model_module_func))

found split_data_txt_module
found split_data_txt_parallel_module
found fasttext_train_module
found fasttext_evaluation_module
found fasttext_score_module
found fasttext_score_parallel_module
found compare_two_models_module


In [7]:
# connect module
@dsl.pipeline(name='test deploy', description='Test parallel', default_compute_target=aml_compute_name)
def training_pipeline(epochs):
    split_data_txt_parallel = split_data_txt_parallel_module_func(
    input_dir = data,
    training_data_ratio = 0.7,
    validation_data_ratio = 0.1,
    random_split = True,
    seed = 7131928
    )

    fasttext_train = fasttext_train_module_func(
    training_data_dir = split_data_txt_parallel.outputs.training_data_output,
    validation_data_dir = split_data_txt_parallel.outputs.validation_data_output,
    char2index_dir = char2index,
    epochs = epochs,
    batch_size = 64,
    learning_rate = 0.0005,
    embedding_dim = 128
    )
    
    fasttext_score_parallel = fasttext_score_parallel_module_func(
    texts_to_score = split_data_txt_parallel.outputs.test_data_output,
    fasttext_model = fasttext_train.outputs.trained_model_dir,
    char2index_dir = char2index
    )
    fasttext_score_parallel.runsettings.configure(node_count=4, process_count_per_node=4, mini_batch_size=128)

    return {**fasttext_score_parallel.outputs, **fasttext_train.outputs}


In [8]:
split_data_txt_parallel = split_data_txt_parallel_module_func(
    input_dir = data,
    training_data_ratio = 0.7,
    validation_data_ratio = 0.1,
    random_split = True,
    seed = 16
    )

fasttext_train = fasttext_train_module_func(
    training_data_dir = split_data_txt_parallel.outputs.training_data_output,
    validation_data_dir = split_data_txt_parallel.outputs.validation_data_output,
    char2index_dir = char2index,
    epochs = 1,
    batch_size = 64,
    learning_rate = 0.0005,
    embedding_dim = 128
    )
type(fasttext_train)

azureml.pipeline.wrapper._module.Module

In [9]:
# pipeline
pipeline = training_pipeline(epochs=1)

In [10]:
# visualization
pipeline.validate()

<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_0f96f898-0002-4dd1-ad90-a7904ce8d04d_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [11]:
# pipeline_draft = pipeline.save(
#     experiment_name='my test',
# )
# pipeline_draft

In [12]:
# run
run = pipeline.submit(experiment_name='parallel', pipeline_parameters={'epochs':1})
run.wait_for_completion()
run
#%


Submitted PipelineRun cec9b659-a387-4bf7-bda4-abaf0840cf8d
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/cec9b659-a387-4bf7-bda4-abaf0840cf8d?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3
PipelineRunId: cec9b659-a387-4bf7-bda4-abaf0840cf8d
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/cec9b659-a387-4bf7-bda4-abaf0840cf8d?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3


<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_0bb311d9-1c2a-4541-8bb4-38e754b24cc7_widget', env_json='{}', graph_jso…

Experiment,Id,Type,Status,Details Page,Docs Page
parallel,cec9b659-a387-4bf7-bda4-abaf0840cf8d,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [13]:
all_child_run=[]
for child_run in run.get_children():
    all_child_run.append(child_run)
    print(child_run,'\n')
all_child_run[-2].get_file_names()    
model = all_child_run[-2].register_model(model_name='BestModel', model_path='Trained_model_dir', tags={'my_tags': 'fasttext'})
model

Run(Experiment: parallel,
Id: d92bd768-6668-4822-9ea2-bd9f43477881,
Type: azureml.StepRun,
Status: Completed) 

Run(Experiment: parallel,
Id: 58535d23-efcb-43d5-911d-787858a87299,
Type: azureml.StepRun,
Status: Completed) 

Run(Experiment: parallel,
Id: 88008a1a-ba20-435c-8476-9a66b440920b,
Type: azureml.StepRun,
Status: Completed) 



Model(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=BestModel, id=BestModel:5, version=5, tags={'my_tags': 'fasttext'}, properties={})

In [14]:
# register env
from azureml.core.environment import Environment
myenv = Environment.from_conda_specification(name = 'env_for_deployment',
                                             file_path = 'deployment/env_for_deployment.yaml')
myenv.register(workspace=workspace)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200423.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "env_for_deployment",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "defaults"
            ],
         

In [15]:
# 3. Define inference configuration
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

myenv = Environment.get(workspace=workspace, name='env_for_deployment', version='1')
inference_config = InferenceConfig(entry_script='scoring_for_deployment.py',
                                   source_directory='deployment',
                                   environment=myenv)
inference_config 

InferenceConfig(entry_script=scoring_for_deployment.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=/mnt/batch/tasks/shared/LS_root/mounts/clusters/my-compute/code/Users/t-yangx/azureml-designer-demo/deployment, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x7fa9945c8b70>)

In [16]:
from azureml.core.model import Model
# my_model = Model(workspace=workspace, name='BestModel', version='1')
my_model = Model(workspace=workspace, name='BestModel_tmp', version='1')
my_model

Model(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=BestModel_tmp, id=BestModel_tmp:1, version=1, tags={}, properties={})

In [17]:
Model.get_model_path(model_name='BestModel_tmp', version=1, _workspace=workspace)

'azureml-models/BestModel_tmp/1/BestModel'

In [54]:
from azureml.core.webservice import AciWebservice, Webservice, LocalWebservice
from azureml.core.model import Model

# deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
deployment_config = LocalWebservice.deploy_configuration(port=8890)
service = Model.deploy(workspace=workspace, name="my-deployment1", models=[my_model], inference_config=inference_config, deployment_config=deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

Downloading model BestModel_tmp:1 to /tmp/azureml_romex5ko/BestModel_tmp/1
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry fundamental33c005c1f.azurecr.io
Logging into Docker registry fundamental33c005c1f.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM fundamental33c005c1f.azurecr.io/azureml/azureml_55f6443a7f1f616df548a00877130395
 ---> ecd09a31e78d
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> 08826c108e36
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjRmNDU1YmQwLWY5NWEtNGI3ZC04ZDA4LTA3ODYxMTUwOGUwYiIsInJlc291cmNlR3JvdXBOYW1lIjoiZnVuZGFtZW50YWwiLCJhY2NvdW50TmFtZSI6ImZ1bmRhbWVudGFsMyIsIndvcmtzcGFjZUlkIjoiYTdjMmFjYWEtYzhmMS00NDhiLWI4OTQtYzJlN2E3MWIzYTMyIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 48efe8b4a47b
 ---> 453ab34a0924
Step 4/5 : RUN mv '/var/azureml-app/tmpjvchxgye.py' /var/azureml-a

In [55]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})
# print(headers)


standard_sample_input = {'param':{'input_sentence': 'haha   haha i want to travel around the world'}}
standard_sample_input = json.dumps(standard_sample_input)
# print(type(standard_sample_input))
# standard_sample_output = {'category': 'dream'}


response = requests.post(service.scoring_uri, data=standard_sample_input, headers=headers)
print(response.status_code)
print(response.elapsed)
# print(response.json())

200
0:00:00.009620


In [56]:
from pprint import pprint
pprint(service.get_logs())

('2020-07-13T13:25:47,308078738+00:00 - iot-server/run \n'
 '2020-07-13T13:25:47,307548735+00:00 - rsyslog/run \n'
 '2020-07-13T13:25:47,318965911+00:00 - gunicorn/run \n'
 '2020-07-13T13:25:47,319718416+00:00 - nginx/run \n'
 '/usr/sbin/nginx: '
 '/azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libcrypto.so.1.0.0: '
 'no version information available (required by /usr/sbin/nginx)\n'
 '/usr/sbin/nginx: '
 '/azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libcrypto.so.1.0.0: '
 'no version information available (required by /usr/sbin/nginx)\n'
 '/usr/sbin/nginx: '
 '/azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.1.0.0: '
 'no version information available (required by /usr/sbin/nginx)\n'
 '/usr/sbin/nginx: '
 '/azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.1.0.0: '
 'no version information available (required by /usr/sbin/nginx)\n'
 '/usr/sbin/nginx: '
 '/azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.

In [57]:
print(response.json())

sports


In [55]:
service.scoring_uri

'http://localhost:8890/score'

In [72]:
aaa= json.loads(standard_sample_input)
print(aaa)
print(type(aaa))

{'input_sentence': 'i want to travel around the world'}
<class 'dict'>


In [101]:
#reuse为Yes时的properties
aaa= all_child_run[-2].properties
aaa

{'azureml.reusedrunid': '9be43325-d271-42e8-a484-d0972718391c',
 'azureml.reusednodeid': '7363a1e9',
 'azureml.reusedpipeline': 'ea73f63e-1c97-49fb-9e1a-a30f50475af4',
 'azureml.reusedpipelinerunid': 'ea73f63e-1c97-49fb-9e1a-a30f50475af4',
 'azureml.runsource': 'azureml.StepRun',
 'azureml.nodeid': '747f4d2d',
 'ContentSnapshotId': '475a76b4-103b-469e-b204-8e8e704d1e02',
 'StepType': 'PythonScriptStep',
 'azureml.moduleid': '063c9b75-df48-4ad2-879e-74692d164e57',
 'azureml.pipelinerunid': '61f676e8-9ab2-4f9a-bc89-248ddf977479',
 '_azureml.ComputeTargetType': 'amlcompute',
 'ProcessInfoFile': 'azureml-logs/process_info.json',
 'ProcessStatusFile': 'azureml-logs/process_status.json'}

In [18]:
#reuse为No时的properties
aaa= all_child_run[-2].properties
aaa

{'azureml.runsource': 'azureml.StepRun',
 'ContentSnapshotId': '475a76b4-103b-469e-b204-8e8e704d1e02',
 'StepType': 'PythonScriptStep',
 'azureml.moduleid': '063c9b75-df48-4ad2-879e-74692d164e57',
 'azureml.pipelinerunid': 'cec9b659-a387-4bf7-bda4-abaf0840cf8d',
 '_azureml.ComputeTargetType': 'amlcompute',
 'ProcessInfoFile': 'azureml-logs/process_info.json',
 'ProcessStatusFile': 'azureml-logs/process_status.json'}

In [19]:
#获取run id
all_child_run[-2].id

'58535d23-efcb-43d5-911d-787858a87299'

In [102]:
type(aaa)

dict