### Parse gpt responses for multiturn chat

In [10]:
import json
from tqdm import tqdm

In [11]:
# Function to get the initial prompt

def build_chat_history(response, current_history, gpt_simpulated):
    current_history.append({"role": gpt_simpulated, "value": response})
    return current_history

# Function to parse the string and create a JSON object
def parse_gpt_response(response, id, current_history, gpt_simulated):
    """
    Helper function fur parse_results()
    Parses the gpt responses from string to dict
    """
    try:
        # Decode any UTF-8 character codes in the input string
        decoded_response = (response.encode().decode('unicode_escape')).encode('latin1').decode('utf-8')
        chat_history = build_chat_history(decoded_response, current_history, gpt_simulated)
        line = {
            "id": id,
            "conversation": chat_history
        }
        return line

    except json.JSONDecodeError as e:
        return None

In [15]:
# Desired output format (.jsonl) -> id will be removed later
# {"id": "1", "conversation": [{"role": "user", "value": "Hello!"}, {"role": "assistant", "value": "Hi, how can I help you today?"}, {"role": "user", "Value": "I need assistance with my account."}]}

situations_name = "specialty_x_domain"
path_to_results = "../results/gpt_results.jsonl"
gpt_simulated = "chatbot" ### Always ajust that
multiturn_path = "../results/multiturn_" + situations_name + ".jsonl"

# Read in the conversation we have so far and store them in a dict
multiturn_conv = {}
with open(multiturn_path, 'r') as file:
    for i, line in enumerate(file):
        line_as_json = json.loads(line)
        multiturn_conv[line_as_json["id"]] = line_as_json["conversation"]

# Extract content from json
print("Parsing GPT responses...")
questions_failed_to_parse = []
with open(path_to_results, 'r') as file:
    for line in tqdm(file):
        try:
            data = json.loads(line)  # Parse each line as JSON
            response_content = data.get("response", {}).get("body", {}).get("choices", [])[0].get("message", {}).get("content", None)
            id = data.get("custom_id") # format "0-0" "task_id - subtopic_id"
            if response_content and id:
                parsed_response = parse_gpt_response(response=response_content, id=id, current_history=multiturn_conv.get(id, []), gpt_simulated=gpt_simulated)
                if bool(parsed_response):
                    multiturn_conv[id] = parsed_response["conversation"]
                else:
                    questions_failed_to_parse.append(id)
        except json.JSONDecodeError as e:
            questions_failed_to_parse.append(id)
print("Parsing completed")

# Save to JSON file
with open(multiturn_path, 'w') as jsonl_file:
    for id, conv in multiturn_conv.items():
        jsonl_file.write(json.dumps({"id": id, "conversation": conv}) + '\n')

# Output the parsed data (for verification)
print(f"Medical prompts have been saved to {multiturn_path}")
print("See an example below:")
print(json.dumps(parsed_response, indent=4))
print(f"Failed to parse {len(questions_failed_to_parse)} questions:")
print(questions_failed_to_parse)

Parsing GPT responses...


29it [00:00, 2779.14it/s]

Parsing completed
Medical prompts have been saved to ../results/multiturn_specialty_x_domain.jsonl
See an example below:
{
    "id": "20-99",
    "conversation": [
        {
            "role": "user",
            "value": "\"What are the latest advancements in immunotherapy for treating metastatic melanoma, and how might individual variations in immune response affect treatment efficacy?\""
        },
        {
            "role": "chatbot",
            "value": "Recent advancements in immunotherapy for metastatic melanoma include the development of combination therapies that use checkpoint inhibitors like nivolumab and ipilimumab to enhance immune responses against melanoma cells. Additionally, personalized treatment approaches, such as tumor-infiltrating lymphocyte (TIL) therapy and cancer vaccines, are being explored to tailor treatments based on individual patient's tumor characteristics.\n\nIndividual variations in immune response can significantly affect treatment efficacy. Fact


