This is a sequential implementation provided for readability. It is also possible to run this parallely (**recommended** for processing large number of case vignettes). The corresponding parallel script can be found in the file `run_conversation_withPE.py` or `run_conversation_withoutPE.py`. See `README.md` for instructions on how to run the scripts.

In [None]:
import json
import pandas as pd
import os

from tqdm.auto import tqdm

import openai

from src.utils import *
from src.craft_md import get_doctor_prompt, get_patient_prompt, get_physical_exam_prompt, get_mcq_prompt, get_choices, get_all_choices, get_diagnosis_after_physical_exam_prompt

In [None]:
# Set up OpenAI API credentials
# Replace this information with your OpenAI key and organization ID.
openai_key = open("../keys/openai_key.txt", "r")
openai.api_key = openai_key.readlines()[0].strip()

organization_id = open("../keys/rajpurkarlab_org_id.txt", "r")
openai.organization = organization_id.readlines()[0].strip()

In [None]:
# read dataset
dataset = pd.read_csv("./data/dataset_final.tsv", sep = "\t")
all_choices = get_all_choices(path = './data/all_choices.txt')

cases = [(dataset.loc[idx,"case_id"], 
             dataset.loc[idx,"case_desc"], 
             dataset.loc[idx,"physical_exam"], 
             get_choices(dataset,idx), 
             all_choices) for idx in range(dataset.shape[0])]

In [None]:
def process_case_withPE(case, save_path, gpt_model, num_runs = 10):
    
    mapping = {"gpt-3.5": call_gpt3_api, "gpt-4": call_gpt4_api}
    
    case_id, case_desc, exam, mcq_choices, mcq_many_choices = case
    
    doctor_prompt = get_doctor_prompt()
    patient_prompt = get_patient_prompt(case_desc)
    exam_prompt = get_physical_exam_prompt(exam)
    
    mcq_prompt = get_mcq_prompt(mcq_choices)
    mcq_all_prompt = get_mcq_prompt(mcq_many_choices)
        
    stats = {case_id:{}}
    j = 0

    if os.path.exists(save_path): 
        stats = json.load(open(save_path, 'r'))
            
        # if key is already present, return without running again
        if stats.get(case_id) != None:
            while f'trial_{j}_doctor_responses' in stats[case_id]:
                j+=1
        else:
            stats[case_id] = {}
            
    while j < num_runs:
        conversation_history_doctor = [{"role": "system", "content": doctor_prompt}]
        conversation_history_patient = [{"role": "system", "content": patient_prompt}]

        # Multi-turn conversation without Physical Exam
        while True:
            # Patient talks
            response_patient = mapping[gpt_model](conversation_history_patient, n_responses=1)

            conversation_history_doctor.append({"role":"user",
                                               "content":response_patient})
            conversation_history_patient.append({"role":"assistant",
                                               "content":response_patient})

            # Doctor talks
            response_doctor = mapping[gpt_model](conversation_history_doctor, n_responses=1)

            conversation_history_doctor.append({"role":"assistant",
                                               "content": response_doctor})
            conversation_history_patient.append({"role":"user", 
                                                "content": response_doctor})

            # Doctor arrives at a differential diagnosis
            if ("?" not in response_doctor) or ('final diagnosis' in response_doctor.lower()):
                break
                
        # Run multi-turn conversation (with Physical Exam) + FRQ
        prompt = exam_prompt + get_diagnosis_after_physical_exam_prompt()
        conversation_history_doctor.append({"role": "system", "content": prompt})
        response_doctor = mapping[gpt_model](conversation_history_doctor, n_responses=1)
        conversation_history_doctor.append({"role":"assistant","content": response_doctor})
        stats[case_id][f"trial_{j}_doctor_responses_with_exam"] = conversation_history_doctor
        conversation_history_doctor = conversation_history_doctor[:-2]

        # Run multi-turn conversation (with Physical Exam) + 4-choice MCQ
        prompt = exam_prompt + mcq_prompt
        conversation_history_doctor.append({"role": "system", "content": prompt})
        response_doctor = mapping[gpt_model](conversation_history_doctor, n_responses=1)
        stats[case_id][f"trial_{j}_mcq_with_exam"] = response_doctor
        conversation_history_doctor.pop()

        # Run multi-turn conversation (with Physical Exam) + many-choice MCQ
        prompt = exam_prompt + mcq_all_prompt
        conversation_history_doctor.append({"role": "system", "content": prompt})
        response_doctor = mapping[gpt_model](conversation_history_doctor, n_responses=1)
        stats[case_id][f"trial_{j}_mcq_many_with_exam"] = response_doctor
        conversation_history_doctor.pop()
        j += 1
 
        json.dump(stats, open(save_path,"w"))

In [None]:
def process_case_withoutPE(case, save_path, gpt_model, num_runs = 10):
    
    mapping = {"gpt-3.5": call_gpt3_api, "gpt-4": call_gpt4_api}
    
    case_id, case_desc, exam, mcq_choices, mcq_many_choices = case
    
    doctor_prompt = get_doctor_prompt()
    patient_prompt = get_patient_prompt(case_desc)
        
    mcq_prompt = get_mcq_prompt(mcq_choices)
    mcq_all_prompt = get_mcq_prompt(mcq_many_choices)
    
    stats = {case_id:{}}
    j = 0
    
    if os.path.exists(save_path): 
        stats = json.load(open(save_path, 'r'))
            
        # if key is already present, return without running again
        if stats.get(case_id) != None:
            pass
        else:
            stats[case_id] = {}
    
    while j < num_runs:
        conversation_history_doctor = [{"role": "system", "content": doctor_prompt}]
        conversation_history_patient = [{"role": "system", "content": patient_prompt}]

        while True:
            # Patient talks
            response_patient = mapping[gpt_model](conversation_history_patient, n_responses=1)

            conversation_history_doctor.append({"role":"user",
                                               "content":response_patient})
            conversation_history_patient.append({"role":"assistant",
                                               "content":response_patient})

            # Doctor talks
            response_doctor = mapping[gpt_model](conversation_history_doctor, n_responses=1)

            conversation_history_doctor.append({"role":"assistant",
                                               "content": response_doctor})
            conversation_history_patient.append({"role":"user", 
                                                "content": response_doctor})

            # Doctor arrives at a differential diagnosis
            if ("?" not in response_doctor) or ('final diagnosis' in response_doctor.lower()):

                # multi-turn conversation + 4-choice MCQs
                c = conversation_history_doctor[:-1]
                c.append({"role": "system", "content": mcq_prompt})

                mcq = mapping[gpt_model](c, n_responses=1)
                
                # multi-turn conversation + many-choice MCQs
                c = conversation_history_doctor[:-1]
                c.append({"role": "system", "content": mcq_all_prompt})

                mcq_all = mapping[gpt_model](c, n_responses=1)

                break

        stats[case_id][f'trial_{j}_doctor_responses'] = conversation_history_doctor[:]
        stats[case_id][f'trial_{j}_mcq'] = mcq
        stats[case_id][f'trial_{j}_mcq_many'] = mcq_all
        j += 1
        
        json.dump(stats, open(save_path,"w"))

In [None]:
path_gpt3 = "./results/conversations_raw/conversations_gpt3.json"
path_gpt4 = "./results/conversations_raw/conversations_gpt4.json"

for case in tqdm(cases):
    process_case_withoutPE(case, path_gpt3, "gpt-3.5", num_runs=10)
    process_case_withoutPE(case, path_gpt4, "gpt-4", num_runs=10)
    process_case_withPE(case, path_gpt3, "gpt-3.5", num_runs=10)    
    process_case_withPE(case, path_gpt4, "gpt-4", num_runs=10)