# Example of how to use Azure OpenAI to compare two responses

In [1]:
import openai
import os

AZURE_OPENAI_RESOURCE = os.environ.get("AZURE_OPENAI_RESOURCE")
AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY")
AZURE_OPENAI_TEMPERATURE = os.environ.get("AZURE_OPENAI_TEMPERATURE")
AZURE_OPENAI_MAX_TOKENS = os.environ.get("AZURE_OPENAI_MAX_TOKENS")
AZURE_OPENAI_CHAT_COMPLETION_MODEL = os.environ.get("AZURE_OPENAI_CHAT_COMPLETION_MODEL")
AZURE_OPENAI_CHAT_COMPLETION_ENGINE = os.environ.get("AZURE_OPENAI_CHAT_COMPLETION_ENGINE")

openai.api_key  = AZURE_OPENAI_KEY
openai.api_type = "azure"
openai.api_version = "2023-08-01-preview"
openai.api_base = f"https://{AZURE_OPENAI_RESOURCE}.openai.azure.com/"    


def get_completion(input_prompt, 
                   printResult = True, 
                   returnWholeObject = False):
    completion = openai.Completion.create(
        prompt=input_prompt, 
        model=AZURE_OPENAI_CHAT_COMPLETION_MODEL,
        engine=AZURE_OPENAI_CHAT_COMPLETION_ENGINE, 
        temperature=float(AZURE_OPENAI_TEMPERATURE), 
        max_tokens=int(AZURE_OPENAI_MAX_TOKENS), 
        stop=["<|im_end|>", "<|im_start|>"])

    if printResult:
        print(completion)
        
    if returnWholeObject:
        return completion
    else:
        return completion["choices"][0]["text"]


In [42]:
qa_guidelines = """<|im_start|>user
Compare the robot resposne versus human response. The robot response is delimited by ###ROBOT### and 
the human response is delimited by ###HUMAN###.
Answer the next questions:
Question-1 : Is the robot's response correct?
Question-2 : Is the robot's response similar to the human answer?
For your responses follow the next format:
Question-1--Correctness--YES or NO--Summarize your arguments in 10 words
Question-2--Similarity--YES or NO--Summarize your arguments in 10 words

###ROBOT###
{robot_response}
###ROBOT###

###HUMAN###
{human_response}
###HUMAN###
<|im_end|>"""
assistant_prompt = """<|im_start|>assistant
"""

robot_answer = """Estimado Silvina Pliego, según la documentación consultada, 
es posible cambiar el rango de horario una vez que se ha optado por uno en el programa ANTI TRÁFICO (LATAM) 1 . 
Sin embargo, se espera que una vez que se haya optado por un rango, se mantenga estable por al menos un mes 
para una fluida organización. Por favor, si necesitas más información o si esta respuesta no responde 
completamente a tu pregunta, no dudes en proporcionar más detalles para que pueda ayudarte mejor."""

human_answer = """Sí, pero se requiere que el rango de horario laboral se mantenga por un mes"""

prompt = qa_guidelines.format(robot_response=robot_answer, human_response=human_answer) + assistant_prompt

print(prompt)

<|im_start|>user
Compare the robot resposne versus human response. The robot response is delimited by ###ROBOT### and 
the human response is delimited by ###HUMAN###.
Answer the next questions:
Question-1 : Is the robot's response correct?
Question-2 : Is the robot's response similar to the human answer?
For your responses follow the next format:
Question-1--Correctness--YES or NO--Summarize your arguments in 10 words
Question-2--Similarity--YES or NO--Summarize your arguments in 10 words

###ROBOT###
Estimado Silvina Pliego, según la documentación consultada, 
es posible cambiar el rango de horario una vez que se ha optado por uno en el programa ANTI TRÁFICO (LATAM) 1 . 
Sin embargo, se espera que una vez que se haya optado por un rango, se mantenga estable por al menos un mes 
para una fluida organización. Por favor, si necesitas más información o si esta respuesta no responde 
completamente a tu pregunta, no dudes en proporcionar más detalles para que pueda ayudarte mejor.
###ROBOT#

In [56]:
result = get_completion(input_prompt=prompt, printResult=False)

print(result)

Question-1--Correctness--YES--The robot's response provides accurate information and guidelines.
Question-2--Similarity--YES--The robot's response is very similar to the human answer.


In [57]:
result

"Question-1--Correctness--YES--The robot's response provides accurate information and guidelines.\nQuestion-2--Similarity--YES--The robot's response is very similar to the human answer."

In [58]:
parts = result.replace("\n", "").replace("\r", "").split("Question-")

In [59]:
parts = result.replace("\n", "").replace("\r", "").split("Question-")
data = []
for i, part in enumerate(parts, start=1):
    if part:
        info = part.split("--")
        print(f"i: {i} part: {part} info: {info}")
        test_id = info[0]
        test = info[1]
        result = 1 if info[2] == "YES" else 0
        reason = info[3]
        item = {
            "test_id": test_id,
            "test": test,
            "result": result,
            "reason": reason
        }
        print(f"Item to add : {item}")
        data.append(item)
    


i: 2 part: 1--Correctness--YES--The robot's response provides accurate information and guidelines. info: ['1', 'Correctness', 'YES', "The robot's response provides accurate information and guidelines."]
Item to add : {'test_id': '1', 'test': 'Correctness', 'result': 1, 'reason': "The robot's response provides accurate information and guidelines."}
i: 3 part: 2--Similarity--YES--The robot's response is very similar to the human answer. info: ['2', 'Similarity', 'YES', "The robot's response is very similar to the human answer."]
Item to add : {'test_id': '2', 'test': 'Similarity', 'result': 1, 'reason': "The robot's response is very similar to the human answer."}


In [63]:
import json

def transform_open_ai_compare_to_json(input_string, debug=False):
    data = []
    if debug: print(f" input string to process {input_string}")
    parts = input_string.replace("\n", "").replace("\r", "").split("Question-")
    if debug: print(f"parts : {parts}")
    for i, part in enumerate(parts, start=1):
        if part:
            info = part.split("--")
            if debug: print(f"i: {i} part: {part} info: {info}")
            test_id = info[0]
            test = info[1]
            result = 1 if info[2] == "YES" else 0
            reason = info[3]
            item = {
                "test_id": test_id,
                "test": test,
                "result": result,
                "reason": reason
            }
            if debug: print(f"Item to add : {item}")
            data.append(item)
    if debug: print(f"data to json : {data}")
    return json.dumps(data, indent=2)

In [65]:
result_to_test = get_completion(input_prompt=prompt, printResult=False)

print(result_to_test)
output_json = transform_open_ai_compare_to_json(result_to_test,debug=True)
print(output_json)

Question-1--Correctness--YES--The robot's response accurately conveys the information.
Question-2--Similarity--YES--The robot's response is very similar to the human answer.
 input string to process Question-1--Correctness--YES--The robot's response accurately conveys the information.
Question-2--Similarity--YES--The robot's response is very similar to the human answer.
parts : ['', "1--Correctness--YES--The robot's response accurately conveys the information.", "2--Similarity--YES--The robot's response is very similar to the human answer."]
i: 2 part: 1--Correctness--YES--The robot's response accurately conveys the information. info: ['1', 'Correctness', 'YES', "The robot's response accurately conveys the information."]
Item to add : {'test_id': '1', 'test': 'Correctness', 'result': 1, 'reason': "The robot's response accurately conveys the information."}
i: 3 part: 2--Similarity--YES--The robot's response is very similar to the human answer. info: ['2', 'Similarity', 'YES', "The robot