In [28]:
import sys
sys.path.insert(0, '.')

In [29]:
import inspect
from azureml.pipeline.core.graph import DataType
from azureml.pipeline.wrapper import Module, dsl, Pipeline
from fasttext_pipeline_utils import choose_workspace, choose_compute_target, load_dataset, register_datatype, load_module, get_source_child_run_id, download_model, register_model_from_local, register_enviroment, get_env, define_inference_configuration, deploy_locally, deploy_to_ACI, deploy_to_AKS, record_model_path, get_model_path

In [30]:
# chose a workspace
subscription_id = '4f455bd0-f95a-4b7d-8d08-078611508e0b'
resource_group = 'fundamental'
workspace_name = 'fundamental3'
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
workspace=choose_workspace(subscription_id, resource_group, workspace_name, tenant_id)

name: fundamental3
resource_group fundamental
location eastasia
subscription_id 4f455bd0-f95a-4b7d-8d08-078611508e0b
compute_targets dict_keys(['myaks2', 'aml-compute', 'my-compute'])


In [31]:
# choose a compute target
name='aml-compute'
aml_compute = choose_compute_target(workspace=workspace, name=name)

Found existing compute target: aml-compute
AmlCompute(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=aml-compute, id=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundamental3/computes/aml-compute, type=AmlCompute, provisioning_state=Succeeded, location=eastasia, tags=None)


In [32]:
# register my own datatype
register_datatype(workspace=workspace, name='MyDirectory', description='', is_directory=True)
register_datatype(workspace=workspace, name='MyFile', description='', is_directory=False)

Datatype of MyDirectory is registered
Datatype of MyFile is registered


In [33]:
# load data
data = load_dataset(name='THUCNews_TXT', 
                    path=['https://datastore4fasttext.file.core.windows.net/data4fasttext/THUCNews.txt'], 
                    description='THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011', 
                    workspace=workspace)

char2index = load_dataset(name='Char2Index_JSON', 
                    path=['https://datastore4fasttext.file.core.windows.net/data4fasttext/character2index.json'], 
                    description='The mapping relationship between character and index', 
                    workspace=workspace)
print('data:',data.description)
print('char2index:',char2index.description)

Successfully loaded THUCNews_TXT
Successfully loaded Char2Index_JSON
data: THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011
char2index: The mapping relationship between character and index 


In [34]:
# load module
namespace=workspace.name
name='Split Data Txt'
yaml_file_path='split_data_txt/split_data_txt.spec.yaml'
split_data_txt_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='Split Data Txt Parallel'
yaml_file_path='split_data_txt_parallel/split_data_txt_parallel.spec.yaml'
split_data_txt_parallel_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Train'
yaml_file_path='fasttext_train/fasttext_train.spec.yaml'
fasttext_train_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Evaluation'
yaml_file_path='fasttext_evaluation/fasttext_evaluation.spec.yaml'
fasttext_evaluation_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Score'
yaml_file_path='fasttext_score/fasttext_score.spec.yaml'
fasttext_score_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Score Parallel'
yaml_file_path='fasttext_score_parallel/fasttext_score_parallel.spec.yaml'
fasttext_score_parallel_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='Compare Two Models'
yaml_file_path='compare_two_models/compare_two_models.spec.yaml'
compare_two_models_module_func = load_module(workspace, namespace, name, yaml_file_path)

# inspect signature
# print(inspect.signature(split_data_txt_module_func))
# print(inspect.signature(split_data_txt_parallel_module_func))
# print(inspect.signature(fasttext_train_module_func))
# print(inspect.signature(fasttext_evaluation_module_func))
# print(inspect.signature(fasttext_score_module_func))
# print(inspect.signature(fasttext_score_parallel_module_func))
# print(inspect.signature(compare2model_module_func))

found the module of Split Data Txt
found the module of Split Data Txt Parallel
found the module of FastText Train
found the module of FastText Evaluation
found the module of FastText Score
found the module of FastText Score Parallel
found the module of Compare Two Models


In [35]:
# connect module
@dsl.pipeline(name='test deploy', description='Test parallel', default_compute_target=aml_compute.name)
def training_pipeline(epochs):
    split_data_txt_parallel = split_data_txt_parallel_module_func(
    input_dir = data,
    training_data_ratio = 0.7,
    validation_data_ratio = 0.1,
    random_split = True,
    seed = 7152113
    )

    fasttext_train = fasttext_train_module_func(
    training_data_dir = split_data_txt_parallel.outputs.training_data_output,
    validation_data_dir = split_data_txt_parallel.outputs.validation_data_output,
    char2index_dir = char2index,
    epochs = epochs,
    batch_size = 64,
    learning_rate = 0.0005,
    embedding_dim = 128
    )
    
    fasttext_score_parallel = fasttext_score_parallel_module_func(
    texts_to_score = split_data_txt_parallel.outputs.test_data_output,
    fasttext_model = fasttext_train.outputs.trained_model_dir,
    char2index_dir = char2index
    )
    fasttext_score_parallel.runsettings.configure(node_count=4, process_count_per_node=4, mini_batch_size=128)

    return {**fasttext_score_parallel.outputs, **fasttext_train.outputs}


In [36]:
# pipeline
pipeline = training_pipeline(epochs=1)

In [37]:
# visualization
pipeline.validate()

<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_c0076b8a-40ca-46dc-b46d-d69bbc968780_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [38]:
# save pipeline
# pipeline_draft = pipeline.save(experiment_name='my test')
# pipeline_draft

In [39]:

# run
run = pipeline.submit(experiment_name='parallel', pipeline_parameters={'epochs':1})
run.wait_for_completion()
run

Submitted PipelineRun 843584f4-f8db-4fcc-bdc6-7bae8ba9fc23
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/843584f4-f8db-4fcc-bdc6-7bae8ba9fc23?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3
PipelineRunId: 843584f4-f8db-4fcc-bdc6-7bae8ba9fc23
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/843584f4-f8db-4fcc-bdc6-7bae8ba9fc23?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3


<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_78f3cd03-282f-4607-800e-739fb67ca630_widget', env_json='{}', graph_jso…

Experiment,Id,Type,Status,Details Page,Docs Page
parallel,843584f4-f8db-4fcc-bdc6-7bae8ba9fc23,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [40]:
# get the child run of FastText Train
child_run=None
for cr in run.get_children():
    if cr.name == 'FastText Train':
        child_run = cr
print(child_run,'\n')
print(child_run.get_file_names())

Run(Experiment: parallel,
Id: cd589fa9-414d-4cbc-8245-dc21b1b233e2,
Type: azureml.StepRun,
Status: Completed) 

['Trained_model_dir', 'azureml-logs/55_azureml-execution-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/65_job_prep-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/executionlogs.txt', 'logs/azureml/stderrlogs.txt', 'logs/azureml/stdoutlogs.txt']


In [41]:
# download the trained model
source_child_run_id=get_source_child_run_id(child_run)
# this depends on the module of FastText Train
trained_model_dir='Trained_model_dir'
deploy_source_dir='./deployment'
path_on_data_store='azureml/{}/{}'.format(source_child_run_id, trained_model_dir)
target_path=deploy_source_dir
print('path_on_data_store',path_on_data_store)
print('target_path',target_path)
download_model(workspace, path_on_data_store, target_path=target_path, overwrite=True)

path_on_data_store azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir
target_path ./deployment
Downloading azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel
Downloaded azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel, 1 files out of an estimated total of 1
model is downloaded to the directory of ./deployment


In [42]:
# register the trained model from local
model_name='BestModel'
model_path=os.path.join(deploy_source_dir,path_on_data_store, model_name)
print('model_path:',model_path)
tags={"algorithm": "fasttext"}
model=register_model_from_local(workspace, model_name, model_path, tags=tags)

# record model path for deployment
source_directory='deployment'
new_model_path=get_model_path(workspace, name=model.name, version=model.version)
print('new_model_path:',new_model_path)
record_model_path(source_directory, new_model_path)

model_path: ./deployment/azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel
Registering model BestModel
model is registered from local
new_model_path: azureml-models/BestModel/25/BestModel
record new model path: azureml-models/BestModel/25/BestModel


In [43]:
# register env
name='env_for_deployment'
file_path='deployment/env_for_deployment.yaml'
env=register_enviroment(workspace, name, file_path)

In [44]:
# define inference configuration
entry_script='scoring_for_deployment.py'
version='1'
environment=get_env(workspace, name, version)
inference_config = define_inference_configuration(entry_script, source_directory, environment)

In [45]:
# deploy locally
service_name='local-deploy-test'
models=[model]
port=8892
service_locally = deploy_locally(workspace, service_name, models, inference_config, port=port)

Downloading model BestModel:25 to /tmp/azureml__yd9z7qz/BestModel/25
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry fundamental33c005c1f.azurecr.io
Logging into Docker registry fundamental33c005c1f.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM fundamental33c005c1f.azurecr.io/azureml/azureml_55f6443a7f1f616df548a00877130395
 ---> ecd09a31e78d
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> ab6a4aec9c4e
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjRmNDU1YmQwLWY5NWEtNGI3ZC04ZDA4LTA3ODYxMTUwOGUwYiIsInJlc291cmNlR3JvdXBOYW1lIjoiZnVuZGFtZW50YWwiLCJhY2NvdW50TmFtZSI6ImZ1bmRhbWVudGFsMyIsIndvcmtzcGFjZUlkIjoiYTdjMmFjYWEtYzhmMS00NDhiLWI4OTQtYzJlN2E3MWIzYTMyIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 3dd058f08d31
 ---> 0822e636a71b
Step 4/5 : RUN mv '/var/azureml-app/tmpynegc0qd.py' /var/azureml-app/mai

In [46]:
# deploy to ACI (Azure Container Instances)
# every time we deploy to ACI, we need to change the service_name or we delete the existing service beforehand
service_name='aci-deploy-test5'
models=[model]
service_aci=deploy_to_ACI(workspace, service_name, models, inference_config, cpu_cores=1, memory_gb=1, overwrite=True)

found existing service named aci-deploy-test5, delete it right now...
Running.......................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [47]:
# workspace with AKS
subscription_id = '74eccef0-4b8d-4f83-b5f9-fa100d155b22'
resource_group = 'DesignerDRI'
workspace_name = 'DesignerDRI_EASTUS'
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"

workspace_aks=choose_workspace(subscription_id, resource_group, workspace_name, tenant_id)

name: DesignerDRI_EASTUS
resource_group DesignerDRI
location eastus
subscription_id 74eccef0-4b8d-4f83-b5f9-fa100d155b22
compute_targets dict_keys(['attachedcompute', 'myaks1', 'default', 'compute', 'cpu-cluster', 'aml-compute'])


In [48]:
# register the trained model from local
model_name='BestModel'
model_path=os.path.join(deploy_source_dir,path_on_data_store, model_name)
print('model_path:',model_path)
tags={"algorithm": "fasttext"}
model=register_model_from_local(workspace_aks, model_name, model_path, tags=tags)

# record model path for deployment
source_directory='deployment'
new_model_path=get_model_path(workspace_aks, name=model.name, version=model.version)
print('new_model_path:',new_model_path)
record_model_path(source_directory, new_model_path)

model_path: ./deployment/azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel
Registering model BestModel
model is registered from local
new_model_path: azureml-models/BestModel/9/BestModel
record new model path: azureml-models/BestModel/9/BestModel


In [49]:
# deploy to AKS (Azure Kubernetes Service)
attachment_name='myaks1'
# every time we deploy to ACI, we need to change the service_name or we delete the existing service beforehand
service_name='aks-deploy-test5'
models=[model]
token_auth_enabled=True
service_aks=deploy_to_AKS(workspace_aks, attachment_name, service_name, models, inference_config, token_auth_enabled=token_auth_enabled,
                  cpu_cores=1, memory_gb=1)

found existing service named aks-deploy-test5, delete it right now...
auth type: token
Running........
Succeeded
AKS service creation operation finished, operation "Succeeded"
Healthy


In [53]:
# consume deployment
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})
# print(headers)


standard_sample_input = {'param':{'input_sentence': '受疫情影响, 今年很多学生不得不在家上课'}}
standard_sample_input = json.dumps(standard_sample_input)

service = service_locally
service = service_aci

service = service_aks
token, refresh_by = service.get_token()
# print(token)
headers['Authorization']=f'Bearer {token}'


response = requests.post(service.scoring_uri, data=standard_sample_input, headers=headers)
print(service.scoring_uri)
print(response)
# print(response.status_code)
# print(response.elapsed)
# print(response.content)
print(response.json())

http://52.157.99.126:80/api/v1/service/aks-deploy-test5/score
<Response [200]>
education
