In [9]:
import sys
import os
import numpy as np

SCRIPT_DIR = os.path.dirname(os.path.abspath("..."))
sys.path.append(os.path.dirname(SCRIPT_DIR))

from training.generate import (generate_response, load_model_tokenizer_for_generate, 
                               get_special_token_id, get_special_token_id)
from training.consts import END_KEY, PROMPT_FORMAT, RESPONSE_KEY_NL
from training.trainer import PROMPT_FORMAT, create_data_set_from_json_list
from datasets import load_from_disk
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)

In [2]:

test_data = load_from_disk("/opt/home/bo_ling/dataset/michelangelo_so_long.hf")["test"]
test_data

Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 44
})

In [3]:
local_output_dir="/opt/home/bo_ling/dolly_training/ma_helpdesk/checkpoint-400"
model, tokenizer = load_model_tokenizer_for_generate(local_output_dir)

In [10]:
def generate_helpdesk_response(
    instruction: str,
    input_text: str,
    model: PreTrainedModel,
    tokenizer: PreTrainedTokenizer,
    do_sample: bool = True,
    max_new_tokens: int = 256,
    top_p: float = 0.92,
    top_k: int = 0,
    **kwargs,
) -> str:
    texts = PROMPT_FORMAT.format(instruction=instruction, input_text=input_text, output_text="")
    input_ids = tokenizer(texts, return_tensors="pt").input_ids.to("cuda")

    response_key_token_id = get_special_token_id(tokenizer, RESPONSE_KEY_NL)
    end_key_token_id = get_special_token_id(tokenizer, END_KEY)

    gen_tokens = model.generate(
        input_ids,
        pad_token_id=tokenizer.pad_token_id,
        # Ensure generation stops once it generates "### End"
        eos_token_id=end_key_token_id,
        do_sample=do_sample,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        **kwargs,
    )[0].cpu()

    # The response will be set to this variable if we can identify it.
    decoded = None

    # Find where "### Response:" is first found in the generated tokens.  Considering this is part of the prompt,
    # we should definitely find it.  We will return the tokens found after this token.
    response_pos = None
    response_positions = np.where(gen_tokens == response_key_token_id)[0]
    if len(response_positions) == 0:
        logger.warn(f"Could not find response key {response_key_token_id} in: {gen_tokens}")
    else:
        response_pos = response_positions[0]

    if response_pos:
        # Next find where "### End" is located.  The model has been trained to end its responses with this sequence
        # (or actually, the token ID it maps to, since it is a special token).  We may not find this token, as the
        # response could be truncated.  If we don't find it then just return everything to the end.  Note that
        # even though we set eos_token_id, we still see the this token at the end.
        end_pos = None
        end_positions = np.where(gen_tokens == end_key_token_id)[0]
        if len(end_positions) > 0:
            end_pos = end_positions[0]

        decoded = tokenizer.decode(gen_tokens[response_pos + 1 : end_pos]).strip()

    return decoded

In [12]:
count = 0
for d in test_data:
    
    instruction = d["instruction"]
    input_text= d["input"]
    generated = generate_helpdesk_response(instruction, input_text, model, tokenizer)
    expected = d['output']
    print("="*100)
    print("="*100)
    print(d)
    print("*"*100)
    print("QUESTIONS:")
    print(input_text)
    print("\nGENERATED:")
    print(generated)
    print("\nEXPECTED:")
    print(expected)
    count += 1
    if count > 5:
        break

{'instruction': 'Please answer the following MA helpdesk questions:', 'input': 'Adding published fields for Michelangelo requires a retrain', 'output': '\nI see you are adding new palette features into published fields that are not in the DSL, which cause the model validation to fail.For this kind of case, you need to update the template and train the model again.You can see the exception stack trace documented in this tickethttps:  t3.uberinternal.com browse MA-4578', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease answer the following MA helpdesk questions:\nAdding published fields for Michelangelo requires a retrain\n\n### Response:\nI see you are adding new palette features into published fields that are not in the DSL, which cause the model validation to fail.For this kind of case, you need to update the template and train the model again.You can see the exception stack trace documented 

{'instruction': 'Please answer the following MA helpdesk questions:', 'input': 'yab Michelangelo P2P predication service locally without remote server or cerberus', 'output': "\nThanks for the question Jiamin. We're working on a long term solution for cerberus. In the meantime, you can use our tool to start a proxy locally.Upgrade to the latest opsctl versionbrew update  brew install opsctlbrew update  brew upgrade opsctlCreate tunnelopsctl service debug tunnel -p <endpoint-name>:<local-port>This will cause a listener on localhost:<local-port> that will forward to a random remote endpoint instance. Then, your yab request should use --peer localhost:<local-port> instead ", 'text': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease answer the following MA helpdesk questions:\nyab Michelangelo P2P predication service locally without remote server or cerberus\n\n### Response:\nThanks for the question Jiamin

In [14]:
instruction = "Please answer the following MA helpdesk questions:"
input_text = """Our ml-code devpod's drogon-cli is too old and running drogon command from it will produce error:
got Unexpected response from uSSO - {"version":"1.0.0","type":"server side error","message":"invalid response from Duo statuscode: 400","error":true}"""
output_text=""
print("QUESTIONS: \n")
print(input_text)
print("\nGENERATED:")
generate_helpdesk_response(instruction, input_text, model, tokenizer)

QUESTIONS: 

Our ml-code devpod's drogon-cli is too old and running drogon command from it will produce error:
got Unexpected response from uSSO - {"version":"1.0.0","type":"server side error","message":"invalid response from Duo statuscode: 400","error":true}

GENERATED:


"Golang doesn't have a nice support for PyML's Arrow encoding yet. The simplest way to support unicode would be just encoding unicode strings with base64 and then decoding them at the model level upon receiving.We can look into golang client support of arrow encoding to make this even more simpler  the same way it works for python."