### Part 3 -> Multiturn conversation dataset generation

In [28]:
import json
from tqdm import tqdm
from openai import OpenAI
import random
import pandas as pd

path_to_api_key: str = "../API_KEY.txt"
my_api_key = open(path_to_api_key, 'r').read()
client = OpenAI(api_key=my_api_key)
gpt_model: str = "gpt-4o"

In [14]:
# Generate multiturn conversations between Meditron (simulated by gpt-4o) and a Physician (simulated by gpt-4o)

# System prompts
content_system = "You are a medical AI chatbot that answers questions from physicians."
content_user = "You are a physician that interacts with a medical AI chatbot. You see the chat history between you (user) and the medical AI chatbot (chatbot). Ask a follow up question or write 'I AM ALL GOOD THANKS', if there is no follow up question to ask."

# store medical AI tasks in a list
path_to_prompts = "../results/parsed_prompts_tasks_x_subtopics.json"
with open(path_to_prompts, 'r') as file:
    task_x_subtopics_prompts = json.load(file)  # Load JSON data from file
initial_physician_prompt = task_x_subtopics_prompts[0]["prompt"]
print(initial_physician_prompt)

Patient is a 45-year-old male with a history of type 2 diabetes and hypertension. He presents with fatigue, unintentional weight loss, and prolonged healing of minor cuts over the past few months. Recent lab tests showed elevated blood glucose levels and low hemoglobin. Given his medical history and symptoms, could these be indicative of an underlying issue related to cellular metabolism or regenerative processes, and what further diagnostic tests would you recommend to explore potential cellular or hematologic disorders?


In [22]:
# Function that generates the next step of a conversation

def next_conversation_step(system_prompt, chat_histroy, verbose = False):
    if system_prompt == "user":
        content = "You are a physician that interacts with a medical AI chatbot. You see the chat history between you (user) and the medical AI chatbot (chatbot). Ask a follow up question or write 'I AM ALL GOOD THANKS', if there is no follow up question to ask."
    else:
        content = "You are a medical AI chatbot that answers questions from physicians."

    prompt = chat_histroy

    if verbose:
        print("Send prompt to GPT:")
        print("#########")
        print(prompt)
        print("########")

    completion = client.chat.completions.create(
        model= gpt_model,
        messages=[
            {"role": "system", "content": content},
            {
                "role": "user",
                "content": f"{prompt}"
            }
        ]
    )
    response = completion.choices[0].message.content
    
    if verbose:
        print("Receiving responses from GPT...")
        print(response)
    return chat_histroy + "\n" + f"({system_prompt}) " + response

In [None]:
conversation_history = []
conversation_history.append("(user) " + initial_physician_prompt)
for i in tqdm(range(3)):
    if i % 2 == 0:
        if conversation_history[-1] != "I AM ALL GOOD THANKS":
            conversation_history.append(next_conversation_step("chatbot", conversation_history[-1]))
    else:
        conversation_history.append(next_conversation_step("user", conversation_history[-1]))

100%|██████████| 3/3 [00:12<00:00,  4.19s/it]


In [26]:
conversation_history

['Patient is a 45-year-old male with a history of type 2 diabetes and hypertension. He presents with fatigue, unintentional weight loss, and prolonged healing of minor cuts over the past few months. Recent lab tests showed elevated blood glucose levels and low hemoglobin. Given his medical history and symptoms, could these be indicative of an underlying issue related to cellular metabolism or regenerative processes, and what further diagnostic tests would you recommend to explore potential cellular or hematologic disorders?',
 "Patient is a 45-year-old male with a history of type 2 diabetes and hypertension. He presents with fatigue, unintentional weight loss, and prolonged healing of minor cuts over the past few months. Recent lab tests showed elevated blood glucose levels and low hemoglobin. Given his medical history and symptoms, could these be indicative of an underlying issue related to cellular metabolism or regenerative processes, and what further diagnostic tests would you reco

In [None]:
class multiturn_style:
    def __init__(self, country, setting, id, cb_length = "short", cb_style = "flat text", emergency = False, u_length = "short",
                cb_scientific = "standard", u_scientific = False, u_bad_grammar = False, u_knows_diagnosis = False):
        
        # Initialize attributes
        self.country = country # country the user is working in
        self.setting = setting  # Can be 'high', 'low', or 'war' -> not used so far
        self.id = id # based on prompt origin and prompt number
        self.chatbot_length = cb_length # Length of chatbots answer -> possibilities: chat, short, long, exactly_2_sentences, exactly_4_sentences
        self.chatbot_sytle = cb_style # Style of chatbots answer -> possibilities: chat, flat text, bullets, step by step
        self.emergency = emergency # User uses chatbot in emergency mode
        self.user_length = u_length # Length of user questions -> possibilities: short, long, exactly_1_sentence, exactly_3_sentences
        self.chatbot_scientific = cb_scientific # If chatbot uses a lot of scientific term -> possibilities: popularization, standard, scientific
        self.user_scientific = u_scientific # User uses a lot of very scientific terms
        self.bad_grammar = u_bad_grammar # user has bad grammar / does a lot of spelling mistakes -> not used so far
        self.knows_diagnosis = u_knows_diagnosis # User has an idea about the diagnosis
        self.presentation = ""  # Placeholder for additional info
        self.infs = 0  # Count of questions asked
        self.MAX_INFS = 10  # Maximum number of questions allowed

    # methods
    def system_prompt_chatbot(self) -> str:
        self.base_system_prompt_chatbot = '''You are a medical AI chatbot designed to assist physicians by answering their questions.
        The style of your answers must follow these rules:'''

        # Chatbot can be used in emergency or normal mode
        if self.emergency:
            self.base_system_prompt_chatbot += "The user is using you (the medical AI chatbot) during a medical emergency. Provide short, concises and well structured answers. Your answers have to be clear and very easy to understand quickly in a high presure and stres environment \n"
        else:
            # Set epected length of chatbots answer
            match self.chatbot_length:
                case "chat":
                    self.base_system_prompt_chatbot += "" # necessary information is added in the style section
                case "short":
                    self.base_system_prompt_chatbot += "Provide short and concises answers. \n"
                case "long":
                    self.base_system_prompt_chatbot += "Provide long and detailed answers. \n"
                case "exactly_2_sentences":
                    self.base_system_prompt_chatbot += "All your answers should be exactly 2 sentences long. \n"
                case "exactly_4_sentences":
                    self.base_system_prompt_chatbot += "All your answers should be exactly 4 sentences long. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_length}'")
            
            # Set expected style of chatbots answer
            match self.chatbot_sytle:
                case "chat":
                    self.base_system_prompt_chatbot += "The user is using you (the medical AI chatbot) in chat mode. Provide short and concise answers that make it possible for the user to chat with you. Ask the user follow up questions about his question if his question is not clear enough. \n"
                case "flat text":
                    self.base_system_prompt_chatbot += "Provide your answers in flat text. \n"
                case "bullets":
                    self.base_system_prompt_chatbot += "Provide your answers using bullet points. \n"
                case "step_by_step":
                    self.base_system_prompt_chatbot += "Provide your answers by giving the physician step by step advice on how to solve the question they asked. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_sytle}'")
            
            # Add if the style of the answers should be very scientific
            match self.chatbot_scientific:
                case "popularization":
                    self.base_system_prompt_chatbot += "Provide your answers using easy to understand terms and avoid using scientific terms that are difficult to understand. \n"
                case "standard":
                    self.base_system_prompt_chatbot += "" # Don't add anything if standard
                case "scientific":
                    self.base_system_prompt_chatbot += "Provide your answers using a lot of very specific scientific terms. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_scientific}'")

        return self.base_system_prompt_chatbot
    
    def system_prompt_user(self) -> str:
        self.base_system_prompt_chatbot = '''You are a physician using a medical AI chatbot to help you taking care of your patients'''

        # User can be in emergency or normal mode
        if self.emergency:
            self.base_system_prompt_chatbot += "The user is using you (the medical AI chatbot) during a medical emergency. Provide short, concises and well structured answers. Your answers have to be clear and very easy to understand quickly in a high presure and stres environment \n"
        else:

            # Set epected length of chatbots answer
            match self.chatbot_length:
                case "chat":
                    self.base_system_prompt_chatbot += "" # necessary information is added in the style section
                case "short":
                    self.base_system_prompt_chatbot += "Provide short and concises answers. \n"
                case "long":
                    self.base_system_prompt_chatbot += "Provide long and detailed answers. \n"
                case "exactly_2_sentences":
                    self.base_system_prompt_chatbot += "All your answers should be exactly 2 sentences long. \n"
                case "exactly_4_sentences":
                    self.base_system_prompt_chatbot += "All your answers should be exactly 4 sentences long. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_length}'")
            
            # Set expected style of chatbots answer
            match self.chatbot_sytle:
                case "chat":
                    self.base_system_prompt_chatbot += "The user is using you (the medical AI chatbot) in chat mode. Provide short and concise answers that make it possible for the user to chat with you. Ask the user follow up questions about his question if his question is not clear enough. \n"
                case "flat text":
                    self.base_system_prompt_chatbot += "Provide your answers in flat text. \n"
                case "bullets":
                    self.base_system_prompt_chatbot += "Provide your answers using bullet points. \n"
                case "step_by_step":
                    self.base_system_prompt_chatbot += "Provide your answers by giving the physician step by step advice on how to solve the question they asked. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_sytle}'")
            
            # Add if the style of the answers should be very scientific
            match self.chatbot_scientific:
                case "popularization":
                    self.base_system_prompt_chatbot += "Provide your answers using easy to understand terms and avoid using scientific terms that are difficult to understand. \n"
                case "standard":
                    self.base_system_prompt_chatbot += "" # Don't add anything if standard
                case "scientific":
                    self.base_system_prompt_chatbot += "Provide your answers using a lot of very specific scientific terms. \n"
                case _:
                    raise ValueError(f"Invalid chatbot_length: '{self.chatbot_scientific}'")
                
        return self.base_system_prompt_user


In [None]:
countries = pd.read_csv("../resources/countries_by_income_category.csv")
sampled_country = random.choice(countries.iloc[:, 0].tolist())


Gabon


In [None]:
def system_prompt(self, final=False) -> str:
        setting_doctor = {
            "high": [
                "high resource setting, where you have access to all necessary equipment and patient history, allowing a thorough diagnosis.",
                "well-equipped medical facility with full access to diagnostic tools and comprehensive patient history for detailed examination.",
                "advanced healthcare environment with modern equipment and complete patient records, facilitating thorough evaluations.",
                "fully resourced clinical setting with extensive diagnostic capabilities and patient information for accurate diagnosis.",
                "top-tier hospital where all medical equipment and patient data are readily available for comprehensive procedures.",
                "high-tech medical center with state-of-the-art equipment and full patient histories for precise diagnoses.",
                "affluent medical practice with unlimited access to diagnostic tests and records, ensuring thorough examinations.",
                "well-funded healthcare institution with complete equipment and patient information for meticulous diagnostic work.",
                "modern hospital environment equipped with all necessary tools and histories for detailed diagnoses.",
                "comprehensive medical setting with abundant resources and data, allowing exhaustive evaluations.",
                "optimal healthcare setting with full access to equipment and records, enabling thorough diagnostics.",
            ],
            "low": [
                "low resource setting, where you should avoid costly or unavailable tests and work with limited patient history to make a diagnosis.",
                "under-equipped facility with scarce resources, requiring you to rely on basic tools and minimal patient information.",
                "minimalistic healthcare environment where advanced diagnostics are unavailable, necessitating careful questioning.",
                "setting with limited medical supplies and equipment, forcing you to prioritize essential diagnostic methods.",
                "resource-constrained clinic where you must make do without extensive tests or detailed patient histories.",
                "environment with restricted access to medical technology, relying on fundamental examination techniques.",
                "basic healthcare setting with few resources, emphasizing the need for astute observation and questioning.",
                "facility lacking advanced equipment and comprehensive records, requiring ingenuity in diagnosis.",
                "sparse medical environment where you depend on limited tools and patient interaction for diagnosis.",
                "setting with minimal resources and no access to advanced tests, relying heavily on clinical skills.",
                "deprived healthcare scenario where you avoid costly tests and use limited history to diagnose.",
            ],
            "war": [
                "war zone setting, where resources are scarce, the environment is unstable, and patients may be traumatized or injured, requiring a fast, empathetic diagnosis. You do not have access to the patient's history.",
                "conflict area with critical resource shortages, unstable conditions, and wounded patients needing immediate attention without prior history.",
                "active combat zone where medical supplies are limited, and you must provide quick diagnoses under pressure without patient records.",
                "battlefield medical setting with scarce equipment, focusing on rapid, compassionate care for injured individuals with no history available.",
                "war-torn environment where you face resource scarcity and must deliver swift diagnoses to traumatized patients without prior information.",
                "emergency field hospital in a conflict zone, dealing with instability and limited resources, requiring prompt diagnosis without histories.",
                "crisis area with minimal medical support, where you offer urgent care to injured patients without access to their medical backgrounds.",
                "military conflict setting with depleted resources, demanding immediate, empathetic diagnosis for patients lacking history.",
                "unstable war environment with scarce supplies, where you must quickly assess and diagnose without patient records.",
                "front-line medical post during war, facing resource limitations and no patient histories, necessitating rapid, compassionate diagnoses.",
                "high-risk conflict zone with limited medical resources, providing fast diagnoses to injured patients without access to their histories.",
            ],
        }

        # First sentence options
        # Updated first sentence options
        if self.brief:
            first_sentence_options = [
                "As a doctor in {}, you are in {} Your task is to diagnose a patient by asking concise questions and providing brief reasoning. Keep your explanations short and to the point, and ask pertinent questions.",
                "You are practicing medicine in {}. {} Your mission is to determine the patient's diagnosis through concise inquiry, being efficient in your thought process. Make your thinking clear but succinct, then pose relevant questions.",
                "While working as a physician in {}, {} Your role is to identify the patient's ailment by asking concise questions and efficiently analyzing your thoughts. Be explicit yet brief in your reasoning, and ask appropriate questions.",
                "In {}, you find yourself in {} Your task is to diagnose a patient by asking concise questions and providing brief reasoning. Keep your explanations short and to the point, and ask pertinent questions.",
                "Serving as a doctor in {}, {} Your mission is to determine the patient's diagnosis through concise inquiry, being efficient in your thought process. Make your thinking clear but succinct, then pose relevant questions.",
                "As a physician in {}, you are confronted with {} Your role is to identify the patient's ailment by asking concise questions and efficiently analyzing your thoughts. Be explicit yet brief in your reasoning, and ask appropriate questions.",
            ]
        elif self.bullet:
            first_sentence_options = [
                "As a doctor in {}, you are in {} Your task is to diagnose a patient by asking concise questions and providing brief reasoning. Be explicit and to the point, present your reasoning and questions in bullet points.",
                "You are practicing medicine in {}. {} Your mission is to determine the patient's diagnosis through concise inquiry, being efficient in your thought process. Make your thinking clear and format your reasoning and questions as bullet points.",
                "While working as a physician in {}, {} Your role is to identify the patient's ailment by asking concise questions and efficiently analyzing your thoughts. Be explicit and present your questions and reasoning in bullet points.",
                "In {}, you find yourself in {} Your task is to diagnose a patient by asking concise questions and providing brief reasoning. Make your thinking clear and use bullet points to present your reasoning and questions.",
                "Serving as a doctor in {}, {} Your mission is to determine the patient's diagnosis through concise inquiry, being efficient in your thought process. Make your thinking clear and present your reasoning and questions in bullet points.",
                "As a physician in {}, you are confronted with {} Your role is to identify the patient's ailment by asking concise questions and efficiently analyzing your thoughts. Be explicit and format your reasoning and questions as bullet points.",
            ]
        else:
            first_sentence_options = [
                "As a doctor in {}, you are in {} Your task is to diagnose a patient by asking questions and carefully considering your thoughts. Explain your reasoning and ask pertinent questions.",
                "You are practicing medicine in {}. {} Your mission is to determine the patient's diagnosis through inquiry, being thorough in your thought process. Make your thinking explicit, then pose relevant questions.",
                "While working as a physician in {}, {} Your role is to identify the patient's ailment by asking questions and thoroughly analyzing your thoughts. Be explicit about your reasoning, and ask appropriate questions.",
                "In {}, you find yourself in {} Your task is to diagnose a patient by asking questions and carefully considering your thoughts. Explain your reasoning and ask pertinent questions.",
                "Serving as a doctor in {}, {} Your mission is to determine the patient's diagnosis through inquiry, being thorough in your thought process. Make your thinking explicit, then pose relevant questions.",
                "As a physician in {}, you are confronted with {} Your role is to identify the patient's ailment by asking questions and thoroughly analyzing your thoughts. Be explicit about your reasoning, and ask appropriate questions.",
            ]

        first_sentence = random.choice(first_sentence_options).format(self.country, random.choice(setting_doctor[self.setting]))

        # Test request sentence for high resource setting
        if self.setting == "high" and self.infs < self.MAX_INFS:
            test_request_options = [
                "You may also request tests if they are available and can assist in your diagnosis, using the format \"REQUEST TEST: [test]\". For instance, \"REQUEST TEST: Chest_X-Ray\".",
                "If helpful and accessible, you should request tests to aid your diagnosis using the format \"REQUEST TEST: [test]\". Example: \"REQUEST TEST: Chest_X-Ray\".",
                "Feel free to request any available tests that might help in diagnosing, using \"REQUEST TEST: [test]\". For example, \"REQUEST TEST: Chest_X-Ray\".",
            ]
            test_request_sentence = random.choice(test_request_options)
        else:
            test_request_sentence = ""

        # Question limit sentence options
        if final:
            question_limit_options = [
                "You are only allowed to ask {} questions in total before you must make a decision.",
                "You have a limit of {} questions to ask before making a diagnosis.",
                "You can ask up to {} questions before you need to decide.",
                "You may ask a maximum of {} questions before diagnosing.",
                "A total of {} questions are allowed before you must diagnose.",
                "You have {} questions in total to reach your diagnosis.",
                "Only {} questions can be asked before making your decision.",
                "You are permitted to ask up to {} questions before you must decide.",
            ]
            question_limit_sentence = random.choice(question_limit_options).format(self.MAX_INFS)
        else:
            question_limit_options = [
                "You are only allowed to ask {} questions in total before you must make a decision. You have asked {} questions so far.",
                "You have a limit of {} questions to ask before making a diagnosis. So far, you've asked {} questions.",
                "You can ask up to {} questions before you need to decide. Currently, you have asked {} questions.",
                "You may ask a maximum of {} questions before diagnosing. You have already asked {} questions.",
                "A total of {} questions are allowed before you must diagnose. Up to now, you've asked {} questions.",
                "You have {} questions in total to reach your diagnosis. So far, you've used {} questions.",
                "Only {} questions can be asked before making your decision. You have asked {} questions till now.",
                "You are permitted to ask up to {} questions before you must decide. At present, you have asked {} questions.",
            ]
            question_limit_sentence = random.choice(question_limit_options).format(self.MAX_INFS, self.infs)
        

        # Diagnosis instruction options
        diagnosis_instruction_options = [
            "Once you have decided to make a diagnosis, please type \"DIAGNOSIS READY: [diagnosis here]\".",
            "When ready to diagnose, enter \"DIAGNOSIS READY: [diagnosis here]\".",
            "After concluding your diagnosis, submit it using \"DIAGNOSIS READY: [diagnosis here]\".",
            "Upon reaching a diagnosis, please type \"DIAGNOSIS READY: [diagnosis here]\".",
            "Once you are confident in your diagnosis, enter \"DIAGNOSIS READY: [diagnosis here]\".",
            "When you have determined the diagnosis, submit it using \"DIAGNOSIS READY: [diagnosis here]\".",
            "After you have made a diagnosis, indicate it by typing \"DIAGNOSIS READY: [diagnosis here]\".",
            "When you are prepared to diagnose, please provide it using \"DIAGNOSIS READY: [diagnosis here]\".",
        ]
        diagnosis_instruction_sentence = random.choice(diagnosis_instruction_options)

        # Image request sentence
        if self.img_request:
            image_request_options = [
                " You may also request medical images related to the disease to be returned with \"REQUEST IMAGES\".",
                " Additionally, you can ask for medical images by typing \"REQUEST IMAGES\".",
                " If needed, request relevant medical images using \"REQUEST IMAGES\".",
            ]
            image_request_sentence = random.choice(image_request_options)
        else:
            image_request_sentence = ""

        # Knows diagnosis sentence
        if self.knows_diagnosis and not final:
            knows_diagnosis_options = [
                " You suspect that the patient suffers from {}. This affects the questions you ask the patient.",
                " You have a hunch that the patient may have {}. This should influence your questioning.",
                " You believe the patient might be suffering from {}. Let this guide your questions.",
            ]
            knows_diagnosis_sentence = random.choice(knows_diagnosis_options).format(self.correct_diagnosis)
        else:
            knows_diagnosis_sentence = ""
        

        # Combine all parts
        self.base = (
            first_sentence + " "
            + test_request_sentence + " "
            + question_limit_sentence + " "
            + diagnosis_instruction_sentence
            + image_request_sentence
            + knows_diagnosis_sentence
        )
        if not final:
            self.base = self.base + "If this isn't your final question, you shouldn't provide a diagnosis yet. Continue asking questions until you're ready to diagnose."

        # Presentation options
        presentation_options = [
            "\n\nBelow is all of the information you have: {}.\n\nRemember, you must discover their disease by asking them questions.",
            "\n\nHere is all the information available to you: {}.\n\nKeep in mind, you need to find out their disease by asking questions.",
            "\n\nThe following is all the information you possess: {}.\n\nRemember to uncover their disease through your questions.",
            "\n\nYou have the following information at your disposal: {}.\n\nYour task is to diagnose their disease by asking questions.",
            "\n\nAll the information you currently have is: {}.\n\nRemember, you need to determine their illness through your inquiries.",
            "\n\nHere's what you know so far: {}.\n\nDon't forget, you must identify their disease by questioning them.",
            "\n\nProvided below is all the information you have: {}.\n\nKeep in mind, you must discover their illness by asking them questions.",
            "\n\nThe information available to you is as follows: {}.\n\nRemember to find out their disease by asking appropriate questions.",
        ]
        presentation = random.choice(presentation_options).format(self.presentation)
        if (self.infs <= self.MAX_INFS - 2) and (self.setting == "high"):
            presentation = presentation + " You can also request tests if they can help in your diagnosis."
        return self.base + presentation
