In [22]:
from azureml.core import Workspace, Experiment, Dataset, Datastore
from azure.ml.component import Pipeline, Component, dsl, Run

In [23]:
subscription_id = '20d3c9e4-625d-45e1-ac8a-def90d3c4a88' 
workspace_name = 'relevance2-aml' 
resource_group = 'RSTrainEastUS_32GB-2-aml'

itp_ws = Workspace.get(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group)

itp_compute_name="v100-8x-eus"

In [24]:
# component

module_spec_file_training = "D:\\work\code\\deeprank\\deeprank\\aml_pipeline\\aml_modules\\ranklm_training\\ranklm_training.component.yaml"
ranklm_training_module = Component.from_yaml(itp_ws, module_spec_file_training)

In [25]:
import os, sys
import logging
import traceback

adls_datastore_name='adls_relevance09'

try:
    adls_datastore_itp = Datastore.get(itp_ws, adls_datastore_name)
    print("found datastore with name: %s" % adls_datastore_name)
except:
    print(f"please register {adls_datastore_name} datastores in your workspace")


found datastore with name: adls_relevance09


In [17]:
# dataset
ranklm_adls_input_train_path = "/projects/relatedpage/RankLM_data/train.tsv"
ranklm_adls_input_test_path = "/projects/relatedpage/RankLM_data/test.tsv"
ranklm_adls_input_model_path = "/projects/relatedpage/RankLM_model/bert_model.pth"

input_data_dict = {
    "ranklm_adls_train_data": (adls_datastore_itp,ranklm_adls_input_train_path,"ranklm data"),
    "ranklm_adls_test_data": (adls_datastore_itp,ranklm_adls_input_test_path,"ranklm data"),
    "ranklm_adls_model_path":(adls_datastore_itp,ranklm_adls_input_model_path,"ranklm model"),
}

input_datasets = {}
for name, description in input_data_dict.items():
    if name not in itp_ws.datasets:
        print("Registering {} dataset for pipeline ...".format(name))
        dataset = Dataset.File.from_files((description[0], description[1]))

        dataset.register(
            workspace=itp_ws,
            name=name,
            description="{}".format(description[2]),
            create_new_version=True,
        )
        print("{} is Registered".format(name))
    else:
        print("found {} dataset".format(name))
    dataset = Dataset.get_by_name(itp_ws, name=name)
    input_datasets[name] = dataset

found ranklm_adls_train_data dataset
found ranklm_adls_test_data dataset
found ranklm_adls_model_path dataset


In [30]:
## Training Params

config_file = "configs/relatedpage/ranklm_v2/train.json"
running_mode = "deepspeed"
base_model_name_or_path = "fixtures/ranklm/RankLM_V2"
base_model_output_size =  768 
training =  True 
override =  "{entry:{batch_size_per_gpu:80}}"
mlflow = "true" 

In [31]:
# define a pipeline
@dsl.pipeline(
    name="RankLM Training on AML",
    description="RankLM training on AML ModuleSDK",
    default_compute_target=itp_compute_name,
)
def ranklm_pipeline():
    ranklm_training_step = ranklm_training_module(
            config_file=config_file,
            running_mode=running_mode,
            base_model_name_or_path=base_model_name_or_path,
            checkpoint_path=input_datasets["ranklm_adls_model_path"],
            base_model_output_size=base_model_output_size,
            training=training,
            train_data_path=input_datasets["ranklm_adls_train_data"],
            test_data_path=input_datasets["ranklm_adls_test_data"],
            override=str(override),
            mlflow=mlflow
            )
    ranklm_training_step.runsettings.configure(
        target=itp_compute_name,
    )
    ranklm_training_step.k8srunsettings.resource_configuration.gpu_count = 4

In [32]:
# create a pipeline and visualize the graph
pipeline = ranklm_pipeline()
pipeline.validate()

<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_ff2075aa-3f16-4825-8187-a6090fa55005_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [33]:
run = pipeline.submit(experiment_name = 'ranklm-pipeline-ITP', regenerate_outputs=False)
run

Submitted PipelineRun 8d25e25f-cda8-42da-afaf-c20e3095b315
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ranklm-pipeline-ITP/runs/8d25e25f-cda8-42da-afaf-c20e3095b315?wsid=/subscriptions/20d3c9e4-625d-45e1-ac8a-def90d3c4a88/resourcegroups/RSTrainEastUS_32GB-2-aml/workspaces/relevance2-aml


Experiment,Id,Type,Status,Details Page,Docs Page
ranklm-pipeline-ITP,8d25e25f-cda8-42da-afaf-c20e3095b315,azureml.PipelineRun,Preparing,Link to Azure Machine Learning studio,Link to Documentation
