In [1]:
%%time
# Import required Azure Packages
from azureml.core import Workspace, Dataset, Datastore, Environment, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.runconfig import RunConfiguration, DockerConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter, TrainingOutput
from azureml.pipeline.steps import PythonScriptStep, AutoMLStep
from azureml.data import OutputFileDatasetConfig
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule

from azureml.pipeline.core import PipelineRun
from azureml.train.automl.run import AutoMLRun

CPU times: user 169 ms, sys: 39.6 ms, total: 208 ms
Wall time: 329 ms


In [14]:
import json
import pandas as pd

In [2]:
# User name - specify your user name (this name will be used for creating base directories,  dataAsset naming)
# Make sure to keep this unique & short
user_name = 'ab'

# Provide model_name to register, it will auto-increment the model version.
# Provide experiment_name and datastore_name for which the specific model iteration needs to be specified.
model_name = f'{user_name}_ChurnPrediction_Model'
experiment_name = f'{user_name}_model_training_pipeline_test'
datastore_name = "bidv_blob_datastore"

# Specify a pipeline_run_id and model_iteration_id to register that specific model to Azure
pipeline_run_id = 'cd4ed604-ae7a-4381-a12d-e9cb7e961aba'
user_selected_iteration_id = '819ad93c-20d6-4328-8d9c-bfe327094ed7_39'

# Specify the columns on which the model was trained, if not specified, NDP will fail
tbl_train_columns = 'AMOUNT_TRANSACTION_AUTO;NO_CREDIT_l6m_min;NO_FEE_TRANSACTION_l1m_mean;AMOUNT_TRANSACTION_AUTO_l6m_min;PRE_CLS_BAL_l3m_std;NO_CREDIT_l2m_min;EMAIL_FLAG;PROVINCE_REGION_CENTRAL;OCCUPATION_GROUP_Manual_Labor_and_Trades;NO_FEE_TRANSACTION_l5m_sum;AMOUNT_TRANSACTION;L3M_LOAN_FLAG;NO_DEPOSIT_l3m_std;PRE_CLS_BAL_l3m_max;AGE_GROUP_genX;OCCUPATION_GROUP_Service_and_Hospitality;NO_TRANSATION_l2m_std;PRE_CLS_BAL_l4m_std;AMOUNT_TRANSACTION_AUTO_l4m_std;NO_FUND_TRANSFER_l6m_min;NO_CREDIT;NO_FEE_TRANSACTION_l2m_std;L3M_FD_FLAG;NO_SMB_l2m_min;NO_DEBIT_l3m_min;NO_TRANSATION_AUTO_l2m_std;NO_DEPOSIT_l3m_sum;AMT_CREDIT;OCCUPATION_GROUP_Other;AGE_GROUP_millennials;NO_FEE_TRANSACTION_l2m_max;NO_TRANSATION_AUTO_l6m_min;GENDER_Female;num_of_PRODUCT_ID;NO_FEE_TRANSACTION_l4m_std;No_of_Accounts_l5m_max;NO_ATM_l3m_std;NO_DEPOSIT_l2m_std;NO_DEBIT_l6m_max;NO_SMB_l2m_max;NO_WITHDRAW_l2m_std;AMT_CREDIT_l6m_std;AMT_DEBIT;MARITAL_GROUP_SINGLE;NO_ATM_l6m_mean;AMT_CREDIT_l6m_min;NO_FEE_TRANSACTION_l3m_std;NO_DEBIT_l2m_std;NO_CREDIT_l2m_max;AGE_GROUP_boomers;NO_CREDIT_l3m_min;PRE_CLS_BAL_l2m_std;AMOUNT_TRANSACTION_AUTO_l4m_min;L6M_LOAN_FLAG;NO_FUND_TRANSFER_l3m_min;MARITAL_GROUP_OTHER;NO_ATM_l2m_max;NO_SMB_l6m_max;NO_TRANSATION_AUTO_l5m_std;churn_flag;NO_CREDIT_l5m_std;NO_TRANSATION_AUTO_l5m_min;NO_FEE_TRANSACTION_l3m_min;NO_DEBIT_l6m_min;AMT_CREDIT_l2m_min;NO_TRANSATION_AUTO_l3m_std;PRE_CLS_BAL_l2m_max;L6M_FD_FLAG;AMOUNT_TRANSACTION_AUTO_l5m_sum;AMOUNT_TRANSACTION_AUTO_l6m_std;NO_DEBIT_l2m_max;NO_FEE_TRANSACTION_l3m_mean;AMT_CREDIT_l5m_min;NO_FEE_TRANSACTION_l6m_std;NO_ATM_l6m_min;AMOUNT_TRANSACTION_AUTO_l5m_std;NO_ATM_l4m_std;PRE_CLS_BAL_l6m_sum;NO_FEE_TRANSACTION_l3m_max;NO_CREDIT_l2m_std;OCCUPATION_GROUP_Business_and_Sales;NO_SMB_l2m_std;PRE_CLS_BAL_l5m_std;PROVINCE_REGION_SOUTH;NO_ATM_l1m_mean;num_of_Closed;AGE_GROUP_genZ;PRE_CLS_BAL_l6m_min;NO_FUND_TRANSFER_l5m_min;AMT_DEBIT_l3m_min;NO_DEBIT_l5m_min;PRE_CLS_BAL_l5m_min;AMT_CREDIT_l3m_std;NO_TRANSATION_AUTO;NO_DEPOSIT_l6m_std;NO_DEBIT;PROVINCE_REGION_NORTH;PRE_CLS_BAL;NO_CREDIT_l5m_min;NO_CREDIT_l1m_mean;GENDER_Male;NO_ATM_l2m_min;NO_DEPOSIT_l6m_sum;NO_ATM_l6m_std;AMT_DEBIT_l1m_mean;AMOUNT_TRANSACTION_AUTO_l2m_min;MARITAL_GROUP_MARRIED;PRE_CLS_BAL_l4m_min;NO_ATM_l3m_min'

# Specify user_selected_metric
user_selected_metric = 'f1_score_macro'

# Specify other variables
observation_month_number = 7
observation_year = 2022
historical_months = 6

In [3]:
model_tags = {}
model_tags['pipeline_run_id'] = pipeline_run_id
model_tags['custom_model_iteration_selected'] = user_selected_iteration_id
model_tags['observation_year'] = observation_year
model_tags['observation_month_number'] = observation_month_number
model_tags['model_name'] = model_name
model_tags['historical_months'] = historical_months

In [6]:
# Loads workspace info from config.json file and get workspace details
ws = Workspace.from_config(path='../Configurations/', _file_name='workspace_config.json')
ws_details = ws.get_details()['id']

# Load the automl step run id as specified from the workspace
automl_step_run_id, user_selected_model_iteration_num = user_selected_iteration_id.split('_')

# Get current experiment
experiment = Experiment(ws, experiment_name)
print('Experiment Name:', experiment_name)

# Get the pipeline
pipeline_run = PipelineRun(experiment, pipeline_run_id)
print('Details of current_pipeline_run:', pipeline_run)

# Get model trainig step run
automl_step_run = pipeline_run.find_step_run('Model_Training_Testing')[0]
print('Details of automl_step_run:', automl_step_run)
# Convert normal run to AutoMLRun object
automl_step_run = AutoMLRun(experiment, automl_step_run.id)

Experiment Name: ab_model_training_pipeline_test
Details of current_pipeline_run: Run(Experiment: ab_model_training_pipeline_test,
Id: cd4ed604-ae7a-4381-a12d-e9cb7e961aba,
Type: azureml.PipelineRun,
Status: Completed)
Details of automl_step_run: Run(Experiment: ab_model_training_pipeline_test,
Id: 819ad93c-20d6-4328-8d9c-bfe327094ed7,
Type: azureml.StepRun,
Status: Completed)


In [7]:
def fun_get_model_metrics(current_pipeline_run, best_run):
    metrics_output_port = current_pipeline_run.get_pipeline_output('metrics_output')
    metrics_output_port.download('.', show_progress=True)

    # model_output_port = current_pipeline_run.get_pipeline_output('model_output')
    # model_output_port.download('.', show_progress=True)

    metrics_filename = metrics_output_port._path_on_datastore
    # metrics_filename = path to downloaded file
    with open(metrics_filename) as f:
        metrics_output_result = f.read()
    
    deserialized_metrics_output = json.loads(metrics_output_result)
    df = pd.DataFrame(deserialized_metrics_output)
    print(df.to_dict)

    df_best_run = df[[best_run.id]]
    dict_metrics = {}
    df_best_run['metrics'] = [i[0] for i in df_best_run[best_run.id]]
    df_best_run.drop(columns=best_run.id, inplace=True)
    dict_metrics = df_best_run.to_dict()
    
    return dict_metrics

In [8]:
model_properties = {
    'train_columns' : tbl_train_columns
}

In [18]:
# Register best model from this run based on user_selected_iteration_number
best_run, model = automl_step_run.get_output(iteration=user_selected_model_iteration_num)
model_path_on_datastore = best_run.properties['model_data_location'].split('artifact/')[1].split('/model.pkl')[0]
model_path = f'azureml:/{ws_details}/datastores/{datastore_name}/paths/{model_path_on_datastore}'
dict_metrics = fun_get_model_metrics(pipeline_run, best_run)
model_properties['best_model_as_per_user_selected_iteration_number'] = dict_metrics
model_properties['best_model_as_per_user_selected_iteration_number']['run_id'] = best_run.id
model_properties['best_model_as_per_user_selected_iteration_number']['model_path'] = model_path
print(f'Model path for Model selected based on user_selected_iteration_number {user_selected_model_iteration_num}:', model_path)

model = best_run.register_model(
    model_name=model_name+'_test',
    model_path='outputs/model.pkl',
    properties=model_properties,
    tags=model_tags,
)

print("Trained_Model_Name", model.name)
print("Trained_Model_Version", model.version)
print("Trained_Model_Columns", tbl_train_columns)
print("user_selected_metric", user_selected_metric)

print("Registered version {0} of model {1}".format(model.version, model.name))



<bound method DataFrame.to_dict of                                  819ad93c-20d6-4328-8d9c-bfe327094ed7_2  \
balanced_accuracy                                  [0.6403497687463984]   
f1_score_micro                                     [0.8418604651162791]   
recall_score_macro                                 [0.6403497687463984]   
log_loss                                          [0.31025587064795296]   
f1_score_weighted                                  [0.8222803588217082]   
recall_score_weighted                              [0.8418604651162791]   
AUC_micro                                          [0.9402514872904272]   
matthews_correlation                              [0.34859384025627194]   
average_precision_score_macro                      [0.7963494439960137]   
average_precision_score_weighted                   [0.9083282228675442]   
AUC_weighted                                       [0.8763048613712524]   
precision_score_macro                              [0.72436996953