### Test script for multiturn conversation dataset generation

* Cell 1: load packages
* Cell 2: Generate 100000 system prompts to check for potential bugs in the code
* Cell 3 and further: Generate a conversation example

In [1]:
import json
from tqdm import tqdm
from openai import OpenAI
import random
import pandas as pd
from multiturn_modul import MultiturnStyle, get_multiturn_style_additional

path_to_api_key: str = "../API_KEY.txt"
my_api_key = open(path_to_api_key, 'r').read()
client = OpenAI(api_key=my_api_key)
gpt_model: str = "gpt-4o"

In [3]:
# Test get_multiturn_style
countries = pd.read_csv("../resources/countries_by_income_category.csv")
sampled_country = random.choice(countries.iloc[:, 0].tolist())
id = "0-122"

for i in range(100000):
    test = get_multiturn_style(str(i%12)+"-112", "Switzerland")
    test.system_prompt_chatbot()
    test.system_prompt_user()

NameError: name 'get_multiturn_style' is not defined

In [3]:
# Test get_multiturn_style_additonal

import json
import random

def sample_json_entry(json_file_path):
    """
    Randomly samples an entry from a JSON file and returns the id and context.

    Parameters:
        json_file_path (str): Path to the JSON file.

    Returns:
        tuple: A tuple containing the 'id' (str) and 'context' (list) of the sampled entry.
    """
    try:
        # Load the JSON data
        with open(json_file_path, 'r') as file:
            data = json.load(file)

        # Ensure the data is a list of entries
        if not isinstance(data, list):
            raise ValueError("The JSON file must contain a list of entries.")

        # Randomly select an entry
        sampled_entry = random.choice(data)

        # Extract 'id' and 'context'
        entry_id = sampled_entry.get('id')
        context = sampled_entry.get('context')

        # Validate required fields
        if entry_id is None or context is None:
            raise ValueError("Each entry must contain 'id' and 'context' fields.")

        return entry_id, context

    except Exception as e:
        print(f"Error: {e}")
        return None, None

id = ""
for i in tqdm(range(1000)):
    id, context = sample_json_entry("../results/parsed_prompts_task_x_specialties_x_demographic_x_answerstyle.json")
    test = get_multiturn_style_additional(id, context)
    test.system_prompt_chatbot()
    test.system_prompt_user()
print(test.system_prompt_chatbot())
print(test.system_prompt_user())
print(id)

100%|██████████| 1000/1000 [02:39<00:00,  6.26it/s]

You are a medical AI chatbot designed to assist health care workers working in Lithuania by answering their questions. 
The style of your answers must follow these rules: 
- Respectful, polite interaction: You must always engage in a respectful, polite, and courteous manner, maintaining professionalism in all interactions.
- Honest, evidence-based information: All responses have to be based on the latest medical evidence and guidelines
- Ethical and safe content: Under no circumstances should you provide fake, harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. If a question is unclear or factually incorrect, you should explain why rather than attempting to answer it inaccurately.
- Adhere meticulously to any specific formatting instructions provided by the user in the prompt, ensuring the generated output aligns precisely with their requirements.
- Tailor responses to the geographical context, resource setting, level of care, seasonality/epidemiology, and medical




In [37]:
# Test mode
# Generate multiturn conversations between Meditron (simulated by gpt-4o) and a Physician (simulated by gpt-4o)

def get_prompt_by_id(id_value, json_file_path):
    """
    Retrieves the 'prompt' field of the JSON object with the specified 'id'.

    Args:
        id_value (str): The id to search for in the JSON file.
        json_file_path (str): Path to the .json file containing the data.

    Returns:
        str: The content of the 'prompt' field if the id is found.
        None: If the id is not found in the JSON file.
    """
    try:
        with open(json_file_path, 'r') as file:
            data = json.load(file)
            
            for entry in data:
                if entry.get("id") == id_value:
                    return entry.get("prompt")
            
            # If id is not found
            print(f"ID '{id_value}' not found in the file.")
            return None
    except FileNotFoundError:
        print(f"File not found: {json_file_path}")
        return None
    except json.JSONDecodeError:
        print("Error decoding JSON. Please check the file format.")
        return None


# Function that generates the next step of a conversation

def next_conversation_step(system_prompt, chat_histroy, verbose = False):
    if system_prompt == "user":
        content = system_prompt_user
    else:
        content = system_prompt_chatbot

    prompt = chat_histroy

    if verbose:
        print("Send prompt to GPT:")
        print("#########")
        print(prompt)
        print("########")

    completion = client.chat.completions.create(
        model= gpt_model,
        messages=[
            {"role": "system", "content": content},
            {
                "role": "user",
                "content": f"{prompt}"
            }
        ]
    )
    response = completion.choices[0].message.content
    
    if verbose:
        print("Receiving responses from GPT...")
        print(response)
    return chat_histroy + "\n" + f"({system_prompt}) " + response

# System prompts
system_prompt_chatbot = test.system_prompt_chatbot()
system_prompt_user = test.system_prompt_user()  
print(id)
initial_prompt = get_prompt_by_id(id, "../results/parsed_prompts_task_x_specialties_x_demographic_x_answerstyle_2.json")
print(initial_prompt)


B-1124-4C-Mexico
How can I effectively explain the causes and symptoms of acne to a 15-year-old male patient in Mexico, and what treatment options might be suitable for him at this stage, including any potential side effects he should be aware of?
Answer in French.


In [38]:
# Check if everything is fine
N = test.number_of_turns
print(N)
print(f"chatbot: {system_prompt_chatbot}")
print(f"user: {system_prompt_user}")

1
chatbot: You are a medical AI chatbot designed to assist health care workers working in Mexico by answering their questions. 
The style of your answers must follow these rules: 
- Respectful, polite interaction: You must always engage in a respectful, polite, and courteous manner, maintaining professionalism in all interactions.
- Honest, evidence-based information: All responses have to be based on the latest medical evidence and guidelines
- Ethical and safe content: Under no circumstances should you provide fake, harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. If a question is unclear or factually incorrect, you should explain why rather than attempting to answer it inaccurately.
- Adhere meticulously to any specific formatting instructions provided by the user in the prompt, ensuring the generated output aligns precisely with their requirements.
- Tailor responses to the geographical context, resource setting, level of care, seasonality/epidemiology, and

In [39]:
conversation_history = []
conversation_history.append("(user) " + initial_prompt)
for i in tqdm(range(###2*N - 1)): ### Change range -> will cost money
    if i % 2 == 0:
        conversation_history.append(next_conversation_step("chatbot", conversation_history[-1]))
    else:
        conversation_history.append(next_conversation_step("user", conversation_history[-1]))


100%|██████████| 1/1 [00:07<00:00,  7.38s/it]


In [40]:
print(conversation_history[-1])

(user) How can I effectively explain the causes and symptoms of acne to a 15-year-old male patient in Mexico, and what treatment options might be suitable for him at this stage, including any potential side effects he should be aware of?
Answer in French.
(chatbot) Bonjour,

Pour expliquer les causes et symptômes de l'acné à un adolescent de 15 ans, voici une approche simple et compréhensible :

**Causes de l'acné :**

1. **Production de sébum :** À l'adolescence, les hormones augmentent et stimulent les glandes sébacées à produire plus de sébum (huile), ce qui peut boucher les pores de la peau.
2. **Cellules mortes de la peau :** Ces cellules peuvent rester coincées avec le sébum et bloquer les pores.
3. **Bactéries :** La présence de bactéries dans les pores peut provoquer une inflammation.
4. **Facteurs hormonaux :** Les fluctuations hormonales peuvent augmenter la probabilité de développer de l'acné.

**Symptômes de l'acné :**

- Boutons et points noirs
- Papules (petits boutons ro