Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# Debugging Score Script locally

In order to debug and analyze the output of Score script, we need to deploy the model(either locally or to ACI) and then review the prediction from the ML service. This can be time consuming. There is a way to debug Score Script locally without deploying the model. In order to do this we need to the following 

1. Update the Score Script: Add code to the Score script so that the code gets executed when score script run as main program.

2. Download the model and label map

Thanks to Sam Kemp for providing this very useful tip!



In [None]:
#Install these packages to run the model locally
!pip install transformers==2.8.0
!pip install inference-schema

## 1.0 Connect to workspace

Initialize a Workspace object from the existing workspace you created in the Prerequisites step

In [None]:
from azureml.core import Workspace

try:
    ws = Workspace.from_config()
    print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t')
    print('Library configuration succeeded')
except:
    print('Workspace not found')

## 2.0 Update scoring script


Add code to the Score script so that the code gets executed when score script run as main program. This essentially means we need to add code block for "if __name__ == "__main__":" as shown below.

## 3.0 Download the model

To debug the Score script locally, download the model file as follows. This will create a folder structure azureml-models\bertkm_ner. bertkm_ner folder will have the latest version of the model file.  

In [None]:
from azureml.core.model import Model

Model.get_model_path('bertkm_ner', _workspace=ws)

In [None]:
#Download the label map
import json
with open('labelfile.txt', 'r') as fp:
    labelmap = json.load(fp)
type(labelmap)
labelmap

In [None]:
score_text= '''

import json
import argparse
import os
import random
import sys
from tempfile import TemporaryDirectory
from azureml.core import Dataset, Run
import pandas as pd
import torch

# Inference schema for schema discovery
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType

from utils_nlp.common.pytorch_utils import dataloader_from_dataset
from utils_nlp.common.timer import Timer
from utils_nlp.dataset.ner_utils import preprocess_conll
from utils_nlp.models.transformers.named_entity_recognition import (
    TokenClassificationProcessor, TokenClassifier)

from azureml.core.model import Model

model_name = "bert-base-cased"
DO_LOWER_CASE = False
TRAILING_PIECE_TAG = "X"
DEVICE = "cuda"
test_fraction = 0.2
train_file = "ner_dataset2"
max_len = 256
CACHE_DIR = "./temp"
label_map=  %s
BATCH_SIZE = 5
device = "cpu"


def init():
    global model
        
    # load the pretrained model
    model = TokenClassifier(model_name=model_name, num_labels=len(label_map), cache_dir=CACHE_DIR )
    # Load the fine tuned weights
    model_path = Model.get_model_path('bertkm_ner')
    # apply the fine tuned weights to pretrained model
    model.model.load_state_dict(torch.load(model_path, map_location=device))

# Inference schema for schema discovery
standard_sample_input = {'text': 'a sample input record containing some text' }
standard_sample_output = {'tags': {'products': ['Cognitive Search', 'Cosmos DB'], "features": ['indexer']}}

@input_schema('raw_data', StandardPythonParameterType(standard_sample_input))
def run(raw_data):
    input_txt = ""
    try:
        input_txt = raw_data["text"]
        tag_list = []
        processor = TokenClassificationProcessor(model_name=model_name, to_lower=DO_LOWER_CASE, cache_dir=CACHE_DIR)


        product=False
        feature=False
        product_temp=None 
        feature_temp=None

        input_tokens = input_txt.split() 

        sample_dataset = processor.preprocess_for_bert(
            text=[input_tokens],
            max_len=max_len,
            labels=None,
            label_map=label_map,
            trailing_piece_tag=TRAILING_PIECE_TAG,
        )
        sample_dataloader = dataloader_from_dataset(
            sample_dataset, batch_size=BATCH_SIZE, num_gpus=None, shuffle=False, distributed=False
        )
        #for AKS deployment remove the Verbose flag
        preds = model.predict(
                test_dataloader=sample_dataloader,
                num_gpus=None,
                verbose=False
            )
        tags_predicted = model.get_predicted_token_labels(
            predictions=preds,
            label_map=label_map,
            dataset=sample_dataset
        )
        
        tags = {"products": [],"features": []}
        loc = 0
        product_temp=""
        feature_temp=""    
        for i in input_tokens:
            if(loc<256 and loc < len(tags_predicted[0])):
                if tags_predicted[0][loc] == 'B-Product':
                    product = True
                    product_temp=i
                elif tags_predicted[0][loc] == 'I-Product':                
                    product_temp += " " +i
                elif tags_predicted[0][loc] == 'B-Feature':
                    feature = True
                    feature_temp=i
                elif tags_predicted[0][loc] == 'I-Feature':
                    feature_temp += " " +i            
                else:
                    if(product==True):
                        tags["products"].append(product_temp)
                        product=False
                    elif(feature==True):
                        tags["features"].append(feature_temp)
                        feature=False                    
                loc = loc+1



        output = {"tags": tags}  

        return(output)
    except Exception as e:
        result = str(e)
        # return error message back to the client
        return json.dumps({"error": result})
        
if __name__ == '__main__':
    init()
    
    input_data = """{"raw_data": {"text": "If your Cosmos DB account is used by other Azure services like Azure Cognitive Search or the Bing Search API you will have good results."}}"""
    input_json = json.loads(input_data)['raw_data']
    resp=run(input_json ) 
    print(resp)
    
'''%str(labelmap)


In [None]:
with open("score.py", "w") as stream:
   stream.write(score_text)

## 3.0 Run the score script

In [None]:
%run score.py