In [None]:
# Create batch of prompts
import json
from tqdm import tqdm

# Specify which model should be used to answer prompts
gpt_model = "gpt-4o"

# Path for input data and to store prompts
path_medical_settings = "../results/medical_settings.json"
path_medical_subtopics = "../results/medical_subtopics.json"
path_medical_professions = "../results/medical_professions.json"

output_path = "../results/batched_prompts.jsonl"

print("Creating a batch of prompts")
print(f"that can be processed by {gpt_model} in batch mode")

# Load data
with open(path_medical_settings, "r") as f:
    medical_settings = json.load(f)
with open(path_medical_subtopics, "r") as f:
    medical_subtopics = json.load(f)
with open(path_medical_professions, "r") as f:
    medical_professions = json.load(f)

with open(output_path, 'w') as file:
    id = 0
    for prof_entry in tqdm(medical_professions):
        profession = prof_entry["profession"]
        specialties = prof_entry["specialties"]
        for top_entry in medical_subtopics:
            domain = top_entry["domain"]
            subtopics = top_entry["subtopic"]
            for specialty in specialties:
                # for subtopic in subtopics: -> TODO: removed because gives too much prompts
                    content = "You are an assistant responsible for creating prompts that healthcare workers would ask a medical AI chatbot."
                    prompt = f'''Generate a prompt that a {profession} specializing in {specialty} might ask an AI chatbot about the {domain} regarding a practical issue they are facing with a patient.
                    Only include the prompt from the {profession} specializing in {specialty} about the {domain} in your response, with no additional text.'''
                    # TODO: Maybe add some examples
                    # TODO: Different style of prompts (here all are: practical issue they face with a patient)
                    line = {
                        "custom_id": str(id),
                        "method": "POST",
                        "url": "/v1/chat/completions",
                        "body": {
                            "model": gpt_model,
                            "messages": [
                                {"role": "system", "content": content},
                                {"role": "user", "content": f"{prompt}\n{format}"}
                            ],
                            "max_tokens": 150 ### TODO: can be changed... (based on some examples that where in average 105 tokens long)
                        }
                    }
                    file.write(json.dumps(line) + '\n')
                    id += 1

print(f"batch of {id} prompts saved to {output_path}")