In [1]:
import sys
sys.path.insert(0, '.')

In [19]:
import inspect
from azureml.pipeline.core.graph import DataType
from azureml.pipeline.wrapper import Module, dsl, Pipeline
from fasttext_pipeline_utils import choose_workspace, choose_compute_target, load_dataset, register_datatype, load_module, get_source_child_run_id, download_model, register_model_from_local, register_enviroment, get_env, define_inference_configuration, deploy_locally, deploy_to_ACI, deploy_to_AKS

In [3]:
# chose a workspace
subscription_id = '4f455bd0-f95a-4b7d-8d08-078611508e0b'
resource_group = 'fundamental'
workspace_name = 'fundamental3'
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
workspace=choose_workspace(subscription_id, resource_group, workspace_name, tenant_id)

name: fundamental3
resource_group fundamental
location eastasia
subscription_id 4f455bd0-f95a-4b7d-8d08-078611508e0b
compute_targets dict_keys(['myaks2', 'aml-compute', 'my-compute'])


In [4]:
# choose a compute target
name='aml-compute'
aml_compute = choose_compute_target(workspace=workspace, name=name)

Found existing compute target: aml-compute
AmlCompute(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=aml-compute, id=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourceGroups/fundamental/providers/Microsoft.MachineLearningServices/workspaces/fundamental3/computes/aml-compute, type=AmlCompute, provisioning_state=Succeeded, location=eastasia, tags=None)


In [5]:
# register my own datatype
register_datatype(workspace=workspace, name='MyDirectory', description='', is_directory=True)
register_datatype(workspace=workspace, name='MyFile', description='', is_directory=False)

Datatype of MyDirectory is registered
Datatype of MyFile is registered


In [6]:
# load data
data = load_dataset(name='THUCNews_TXT', 
                    path=['https://datastore4fasttext.file.core.windows.net/data4fasttext/THUCNews.txt'], 
                    description='THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011', 
                    workspace=workspace)

char2index = load_dataset(name='Char2Index_JSON', 
                    path=['https://datastore4fasttext.file.core.windows.net/data4fasttext/character2index.json'], 
                    description='The mapping relationship between character and index', 
                    workspace=workspace)
print('data:',data.description)
print('char2index:',char2index.description)

Successfully loaded THUCNews_TXT
Successfully loaded Char2Index_JSON
data: THUCNews dataset is generated by filtering and filtering historical data of Sina News RSS subscription channel from 2005 to 2011
char2index: The mapping relationship between character and index 


In [7]:
# load module
namespace=workspace.name
name='Split Data Txt'
yaml_file_path='split_data_txt/split_data_txt.spec.yaml'
split_data_txt_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='Split Data Txt Parallel'
yaml_file_path='split_data_txt_parallel/split_data_txt_parallel.spec.yaml'
split_data_txt_parallel_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Train'
yaml_file_path='fasttext_train/fasttext_train.spec.yaml'
fasttext_train_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Evaluation'
yaml_file_path='fasttext_evaluation/fasttext_evaluation.spec.yaml'
fasttext_evaluation_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Score'
yaml_file_path='fasttext_score/fasttext_score.spec.yaml'
fasttext_score_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='FastText Score Parallel'
yaml_file_path='fasttext_score_parallel/fasttext_score_parallel.spec.yaml'
fasttext_score_parallel_module_func = load_module(workspace, namespace, name, yaml_file_path)

name='Compare Two Models'
yaml_file_path='compare_two_models/compare_two_models.spec.yaml'
compare_two_models_module_func = load_module(workspace, namespace, name, yaml_file_path)

# inspect signature
# print(inspect.signature(split_data_txt_module_func))
# print(inspect.signature(split_data_txt_parallel_module_func))
# print(inspect.signature(fasttext_train_module_func))
# print(inspect.signature(fasttext_evaluation_module_func))
# print(inspect.signature(fasttext_score_module_func))
# print(inspect.signature(fasttext_score_parallel_module_func))
# print(inspect.signature(compare2model_module_func))

found the module of Split Data Txt
found the module of Split Data Txt Parallel
found the module of FastText Train
found the module of FastText Evaluation
found the module of FastText Score
found the module of FastText Score Parallel
found the module of Compare Two Models


In [8]:
# connect module
@dsl.pipeline(name='test deploy', description='Test parallel', default_compute_target=aml_compute.name)
def training_pipeline(epochs):
    split_data_txt_parallel = split_data_txt_parallel_module_func(
    input_dir = data,
    training_data_ratio = 0.7,
    validation_data_ratio = 0.1,
    random_split = True,
    seed = 7152113
    )

    fasttext_train = fasttext_train_module_func(
    training_data_dir = split_data_txt_parallel.outputs.training_data_output,
    validation_data_dir = split_data_txt_parallel.outputs.validation_data_output,
    char2index_dir = char2index,
    epochs = epochs,
    batch_size = 64,
    learning_rate = 0.0005,
    embedding_dim = 128
    )
    
    fasttext_score_parallel = fasttext_score_parallel_module_func(
    texts_to_score = split_data_txt_parallel.outputs.test_data_output,
    fasttext_model = fasttext_train.outputs.trained_model_dir,
    char2index_dir = char2index
    )
    fasttext_score_parallel.runsettings.configure(node_count=4, process_count_per_node=4, mini_batch_size=128)

    return {**fasttext_score_parallel.outputs, **fasttext_train.outputs}


In [9]:
# pipeline
pipeline = training_pipeline(epochs=1)

In [10]:
# visualization
pipeline.validate()

<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_2cb374a2-854f-4ce1-9e2e-33926e70acb9_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [11]:
# save pipeline
# pipeline_draft = pipeline.save(experiment_name='my test')
# pipeline_draft

In [12]:
# run
run = pipeline.submit(experiment_name='parallel', pipeline_parameters={'epochs':1})
run.wait_for_completion()
run

Submitted PipelineRun 3e404217-983f-46d0-850f-1aa13bb656f9
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/3e404217-983f-46d0-850f-1aa13bb656f9?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3
PipelineRunId: 3e404217-983f-46d0-850f-1aa13bb656f9
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/parallel/runs/3e404217-983f-46d0-850f-1aa13bb656f9?wsid=/subscriptions/4f455bd0-f95a-4b7d-8d08-078611508e0b/resourcegroups/fundamental/workspaces/fundamental3


<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_f8e17598-d48e-47ae-9ba8-c498bc1ee6af_widget', env_json='{}', graph_jso…

Experiment,Id,Type,Status,Details Page,Docs Page
parallel,3e404217-983f-46d0-850f-1aa13bb656f9,azureml.PipelineRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [13]:
# get the child run of FastText Train
child_run=None
for cr in run.get_children():
    if cr.name == 'FastText Train':
        child_run = cr
print(child_run,'\n')
print(child_run.get_file_names())

Run(Experiment: parallel,
Id: e9975a6c-e464-4e82-9e4c-f455278f3084,
Type: azureml.StepRun,
Status: Completed) 

['Trained_model_dir', 'azureml-logs/55_azureml-execution-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/65_job_prep-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_fb77582e7cc8ed084df58427e709a3b17a8427a4b23e098fef9097dba30fa63b_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/executionlogs.txt', 'logs/azureml/stderrlogs.txt', 'logs/azureml/stdoutlogs.txt']


In [14]:
# download the trained model
source_child_run_id=get_source_child_run_id(child_run)
# this depends on the module of FastText Train
trained_model_dir='Trained_model_dir'
deploy_source_dir='./deployment'
path_on_data_store='azureml/{}/{}'.format(source_child_run_id, trained_model_dir)
target_path=deploy_source_dir
print('path_on_data_store',path_on_data_store)
print('target_path',target_path)
download_model(workspace, path_on_data_store, target_path=target_path, overwrite=True)

path_on_data_store azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir
target_path ./deployment
Downloading azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel
Downloaded azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel, 1 files out of an estimated total of 1
model is downloaded to the directory of ./deployment


In [24]:
# register the trained model from local
model_name='BestModel'
model_path=os.path.join(deploy_source_dir,path_on_data_store, model_name)
print('model_path',model_path)
tags={"algorithm": "fasttext"}
model=register_model_from_local(workspace, model_name, model_path, tags=tags)
model

model_path ./deployment/azureml/e5c21cd1-0fb4-4acb-b6be-ff382c3b7f89/Trained_model_dir/BestModel
Registering model BestModel
model is registered from local


Model(workspace=Workspace.create(name='fundamental3', subscription_id='4f455bd0-f95a-4b7d-8d08-078611508e0b', resource_group='fundamental'), name=BestModel, id=BestModel:11, version=11, tags={'algorithm': 'fasttext'}, properties={})

In [16]:
# register env
name='env_for_deployment'
file_path='deployment/env_for_deployment.yaml'
env=register_enviroment(workspace, name, file_path)

In [17]:
# define inference configuration
entry_script='scoring_for_deployment.py'
source_directory='deployment'
version='1'
environment=get_env(workspace, name, version)
inference_config = define_inference_configuration(entry_script, source_directory, environment)

In [25]:
# deploy locally
service_name='local-deploy-test'
models=[model]
port=8891
service_locally = deploy_locally(workspace, service_name, models, inference_config, port=port)

Downloading model BestModel:11 to /tmp/azureml_apymmcmp/BestModel/11
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry fundamental33c005c1f.azurecr.io
Logging into Docker registry fundamental33c005c1f.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM fundamental33c005c1f.azurecr.io/azureml/azureml_55f6443a7f1f616df548a00877130395
 ---> ecd09a31e78d
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> 04acce6d7e92
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjRmNDU1YmQwLWY5NWEtNGI3ZC04ZDA4LTA3ODYxMTUwOGUwYiIsInJlc291cmNlR3JvdXBOYW1lIjoiZnVuZGFtZW50YWwiLCJhY2NvdW50TmFtZSI6ImZ1bmRhbWVudGFsMyIsIndvcmtzcGFjZUlkIjoiYTdjMmFjYWEtYzhmMS00NDhiLWI4OTQtYzJlN2E3MWIzYTMyIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in f0ccd6515dbc
 ---> 1ea13c418ed3
Step 4/5 : RUN mv '/var/azureml-app/tmp3sdwlpjw.py' /var/azureml-app/mai

Error: Container has crashed. Did your init method fail?




Container Logs:
2020-07-15T15:05:13,765236601+00:00 - iot-server/run 
2020-07-15T15:05:13,765713704+00:00 - gunicorn/run 
2020-07-15T15:05:13,768638322+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a5322dee92f8252a51213eb553ff6f2b/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2020-07-15T15:05:13,778016980+00:00 - rsys

WebserviceException: WebserviceException:
	Message: Error: Container has crashed. Did your init method fail?
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Error: Container has crashed. Did your init method fail?"
    }
}

In [None]:
# deploy to ACI (Azure Container Instances)
service_name='ACI-deploy-test'
models=[model]
service_aci=deploy_to_ACI(workspace, service_name, models, inference_config, cpu_cores=1, memory_gb=1)

In [None]:
# workspace with AKS
subscription_id = '74eccef0-4b8d-4f83-b5f9-fa100d155b22'
resource_group = 'DesignerDRI'
workspace_name = 'DesignerDRI_EASTUS'
namespace=workspace_name # for loading module
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)

ws = Workspace(subscription_id, resource_group, workspace_name)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, ws.compute_targets.keys(),sep = '\n')



# chose a workspace
subscription_id = '4f455bd0-f95a-4b7d-8d08-078611508e0b'
resource_group = 'fundamental'
workspace_name = 'fundamental3'
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
workspace=choose_workspace(subscription_id, resource_group, workspace_name, tenant_id)

In [None]:
# deploy to AKS (Azure Kubernetes Service)
workspace=
attachment_name='aaa'
service_name='AKS-deploy-test'
models=[model]
token_auth_enabled=True
deploy_to_AKS(workspace, attachment_name, service_name, models, inference_config, token_auth_enabled=token_auth_enabled,
                  cpu_cores=1, memory_gb=1)

In [None]:
from azureml.core.webservice import AciWebservice, Webservice, LocalWebservice
from azureml.core.model import Model

# deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
deployment_config_local = LocalWebservice.deploy_configuration(port=8890)
service_local = Model.deploy(workspace=workspace, name="my-deployment1", models=[my_model], inference_config=inference_config, deployment_config=deployment_config)
service_local.wait_for_deployment(show_output = True)
print(service_local.state)

In [None]:
print(service_local.state)

In [None]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})
# print(headers)


standard_sample_input = {'param':{'input_sentence': 'haha   haha i want to travel around the world'}}
standard_sample_input = {'param':{'input_sentence': '受疫情影响, 今年很多学生不得不在家上课'}}
standard_sample_input = json.dumps(standard_sample_input)
# print(type(standard_sample_input))
# standard_sample_output = {'category': 'dream'}


response = requests.post(service_local.scoring_uri, data=standard_sample_input, headers=headers)
print(service_local.scoring_uri)
print(response)
print(response.status_code)
print(response.elapsed)
print(response.json())

In [None]:
from pprint import pprint
pprint(service_local.get_logs())

In [None]:
print(response.json())

In [None]:
service.scoring_uri

In [None]:
aaa= json.loads(standard_sample_input)
print(aaa)
print(type(aaa))

In [None]:
#reuse为Yes时的properties
aaa= all_child_run[-2].properties
aaa

In [None]:
#reuse为No时的properties
aaa= all_child_run[-2].properties
aaa

In [None]:
#获取run id
all_child_run[-2].id

In [None]:
type(aaa)

In [None]:
# 部署到ACI上
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
service = Model.deploy(workspace, "aci-deployment1", models=[my_model], inference_config=inference_config, deployment_config=deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)



In [None]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})
# print(headers)


standard_sample_input = {'param':{'input_sentence': 'haha   haha i want to travel around the world'}}
standard_sample_input = {'param':{'input_sentence': '2020年受疫情影响, 今年很多学生不得不在家上课'}}
standard_sample_input = json.dumps(standard_sample_input)
# print(type(standard_sample_input))
# standard_sample_output = {'category': 'dream'}


response = requests.post(service.scoring_uri, data=standard_sample_input, headers=headers)
print(service.scoring_uri)
print(response.status_code)
print(response.elapsed)
print(response.json())

In [None]:
from pprint import pprint
pprint(service.get_logs())

In [None]:
workspace

In [None]:
# 部署到AKS上
subscription_id = '74eccef0-4b8d-4f83-b5f9-fa100d155b22'
resource_group = 'DesignerDRI'
workspace_name = 'DesignerDRI_EASTUS'
namespace=workspace_name # for loading module
# set this if you have multiple tenant
tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47"
interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)

ws = Workspace(subscription_id, resource_group, workspace_name)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, ws.compute_targets.keys(),sep = '\n')




In [None]:
ws.compute_targets

In [None]:
from azureml.core.compute import AksCompute, ComputeTarget
# Set the resource group that contains the AKS cluster and the cluster name
resource_group = 'DesignerDRI'
cluster_name = 'attachedcompute'
# The Azure resource ID for the compute resource being attached.
resource_id = '/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourcegroups/DesignerDRI/providers/Microsoft.ContainerService/managedClusters/designerdri-weu1d991123'
cluster_purpose = None

# Attach the cluster to your workgroup. If the cluster has less than 12 virtual CPUs, use the following instead:
# attach_config = AksCompute.attach_configuration(resource_group = resource_group,
#                                         cluster_name = cluster_name,
#                                         cluster_purpose = AksCompute.ClusterPurpose.DEV_TEST)

#参考这个...
# attach_config = AksCompute.attach_configuration(resource_group = resource_group,
#                                                  cluster_name = cluster_name,
#                                                resource_id = resource_id,
#                                                 cluster_purpose = cluster_purpose
#                                                )

# 这样写才成功
attach_config = AksCompute.attach_configuration(
                                               resource_id = resource_id,
                                                cluster_purpose = cluster_purpose
                                               )
# aks_target = ComputeTarget.attach(ws, cluster_name, attach_config)

In [None]:
from azureml.core.compute import AksCompute
# resource_id = 'designerdri-weu1d991123'
resource_id = '/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourcegroups/DesignerDRI/providers/Microsoft.ContainerService/managedClusters/designerdri-weu1d991123'
attach_config = AksCompute.attach_configuration(resource_id = resource_id)
attach_config

In [None]:
print(attach_config.cluster_name)
print(attach_config.cluster_purpose)
print(attach_config.leaf_domain_label)
print(attach_config.overwrite_existing_domain)
print(attach_config.resource_group)
print(attach_config.resource_id)



In [None]:
aks_target = ComputeTarget.attach(ws, 'myaks1', attach_config)
aks_target

In [None]:
from azureml.core.model import Model
# 使用fundamental3的BestModel_tmp  会出问题...  模型必须在当前workspace下
model_aks = Model(workspace=ws, name='BestModel_tmp', version='1')
model_aks

In [None]:
# Define inference configuration
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

myenv_aks = Environment.get(workspace=workspace, name='env_for_deployment', version='1')
inference_config_aks = InferenceConfig(entry_script='scoring_for_deployment.py',
                                   source_directory='deployment',
                                   environment=myenv)
inference_config_aks

In [None]:
#deploy to aks
from azureml.core.webservice import AksWebservice, Webservice
from azureml.core.model import Model

aks_target = AksCompute(ws,"myaks1")
# If deploying to a cluster configured for dev/test, ensure that it was created with enough
# cores and memory to handle this deployment configuration. Note that memory is also used by
# things such as dependencies and AML components.
deployment_config = AksWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
service = Model.deploy(ws, "myservice3", [model_aks], inference_config_aks, deployment_config, aks_target)
service.wait_for_deployment(show_output = True)
print(service.state)
print(service.get_logs())

In [None]:
from pprint import pprint
pprint(service.get_logs())

In [None]:
primary, secondary = service.get_keys()
print(primary)
print(secondary)

In [None]:
print(headers)

In [None]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
# headers = auth_header  # 这里用的是token, 不适用于key
headers = {}
primary, secondary = service.get_keys()
headers['Authorization'] = f'Bearer {primary}'
# Add content type header
headers.update({'Content-Type':'application/json'})
# print(headers)
print(headers)

standard_sample_input = {'param':{'input_sentence': 'haha   haha i want to travel around the world'}}
standard_sample_input = {'param':{'input_sentence': '2020年受疫情影响, 今年很多学生不得不在家上课'}}
standard_sample_input = json.dumps(standard_sample_input)
# print(type(standard_sample_input))
# standard_sample_output = {'category': 'dream'}


response = requests.post(service.scoring_uri, data=standard_sample_input, headers=headers)
print(service.scoring_uri)
print(response.status_code)
print(response.elapsed)
print(response.json())