In [1]:
import warnings
warnings.filterwarnings('ignore')

## Authenticated ML Client session

In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

registry_name = 'azureml'
credential = DefaultAzureCredential()
ml_client =  MLClient.from_config(credential=credential)

ml_client_registry = MLClient(
    credential=credential,
    subscription_id=ml_client.subscription_id,
    resource_group_name=ml_client.resource_group_name,
    registry_name=registry_name
)

Found the config file in: ./config.json


In [3]:
model_name = 'hepatitis_c_model'
model = ml_client.models.get(name=model_name, label='latest')

## Identify numeric and categorical columns

In [4]:
import pandas as pd

target_column = 'Category'

def get_categorical_numerical_data(dataset):
    dataset = dataset.drop([target_column], axis = 1)  
    categorical = []
    for col, value in dataset.iteritems():
        if value.dtype == 'object' or value.dtype == 'bool':
            categorical.append(col)
    numerical = dataset.columns.difference(categorical)
    return categorical, numerical

# get categorical and numerical fields from training data
train_data = pd.read_parquet('data/train.parquet')
categorical, numerical = get_categorical_numerical_data(train_data)
print('categorical columns: ',  categorical)
print('numerical field: ', numerical)

categorical columns:  ['Sex']
numerical field:  Index(['ALB', 'ALP', 'ALT', 'AST', 'Age', 'BIL', 'CHE', 'CHOL', 'CREA', 'GGT',
       'PROT'],
      dtype='object')


## Define the dashboard components

In [5]:
label = 'latest'

rai_constructor_component = ml_client_registry.components.get(
    name='microsoft_azureml_rai_tabular_insight_constructor', label=label
)

# We get latest version and use the same version for all components
version = rai_constructor_component.version

rai_explanation_component = ml_client_registry.components.get(
    name='microsoft_azureml_rai_tabular_explanation', version=version
)

rai_erroranalysis_component = ml_client_registry.components.get(
    name='microsoft_azureml_rai_tabular_erroranalysis', version=version
)

rai_gather_component = ml_client_registry.components.get(
    name='microsoft_azureml_rai_tabular_insight_gather', version=version
)

## Define the job to create the RAI dashboard insights

In [6]:
import json
from azure.ai.ml import dsl
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import Input

compute_name = 'trainingcompute'
rai_hepatistis_c_version_string = '1'
expected_model_id = f'{model_name}:{model.version}'
azureml_model_id = f'azureml:{expected_model_id}'

@dsl.pipeline(
        compute=compute_name,
        description='RAI hepatitis c prediction data',
        experiment_name= 'RAI_hepatitisc_prediction_RAIInsights_Computation_{rai_hepatitsc_version_string}',
    )
def rai_pipeline(
        target_column_name,
        training_data,
        testing_data
    ):
        # Initiate the RAIInsights
        create_rai_job = rai_constructor_component(
            title='RAI Dashboard',
            task_type='classification',
            model_info=expected_model_id,
            model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),            
            train_dataset=training_data,
            test_dataset=testing_data,
            target_column_name=target_column_name,
            categorical_column_names=json.dumps(categorical),
        )
        create_rai_job.set_limits(timeout=2400)
        
        # Add an explanation
        explain_job = rai_explanation_component(
            comment='Explanation for hepatitis c prediction',
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        )
        explain_job.set_limits(timeout=2400)
        
        # Add error analysis
        erroranalysis_job = rai_erroranalysis_component(
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        )
        erroranalysis_job.set_limits(timeout=2400)

        # Combine everything
        rai_gather_job = rai_gather_component(
            constructor=create_rai_job.outputs.rai_insights_dashboard,
            insight_1=explain_job.outputs.explanation,
            insight_4=erroranalysis_job.outputs.error_analysis,
        )
        rai_gather_job.set_limits(timeout=2400)

        rai_gather_job.outputs.dashboard.mode = 'upload'
        rai_gather_job.outputs.ux_json.mode = 'upload'

        return {
            'dashboard': rai_gather_job.outputs.dashboard,
            'ux_json': rai_gather_job.outputs.ux_json
        }

## Define Pipeline job submission and status check

In [7]:
from azure.ai.ml.entities import PipelineJob
import webbrowser
import time

def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    while created_job.status not in ['Completed', 'Failed', 'Canceled', 'NotResponding']:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print('Latest status : {0}'.format(created_job.status))


    # open the pipeline in web browser
    webbrowser.open(created_job.services['Studio'].endpoint)
    
    #assert created_job.status == 'Completed'
    return created_job

## Run job to create the RAI dashboard

In [9]:
import uuid
from azure.ai.ml import Output

hepatitis_c_train_parquet = Input(
    type='uri_file', path='data/train.parquet', mode='download'
)

hepatitis_c_test_parquet = Input(
    type='uri_file', path='data/test.parquet', mode='download'
)

# Pipeline to construct the RAI Insights
insights_pipeline_job = rai_pipeline(
    target_column_name=target_column,
    training_data=hepatitis_c_train_parquet,
    testing_data=hepatitis_c_test_parquet,
)

# Workaround to enable the download
rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f'azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/',
    mode='upload',
    type='uri_folder',
)
insights_pipeline_job.outputs.ux_json = Output(
    path=f'azureml://datastores/workspaceblobstore/paths/{rand_path}/ux_json/',
    mode='upload',
    type='uri_folder',
)


# submit pipeline
insights_job = submit_and_wait(ml_client, insights_pipeline_job)

Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Failed
