### Part 1 - Test prompts

In [1]:
import json
from tqdm import tqdm
from openai import OpenAI

In [3]:
# Generate prompts based on medical profession speciality x medical domain

content_1 = "You are an assistant responsible for creating prompts that healthcare workers would ask a medical AI chatbot."
prompt_1 = f'''Generate a prompt that a Physician specializing in Allergy and Immunology might ask an AI chatbot about the Urinary System regarding a practical issue they are facing with a patient.
Only include the prompt from the Physician specializing in Allergy and Immunology about the Urinary System in your response, with no additional text.
'''
# TODO: Maybe add examples
# Good example: Can you assist in determining if there is a potential link between recurrent urinary tract infections and an underlying immunodeficiency in a patient, and what specific immunological tests should be considered for further investigation?


# Generate prompts based on medical AI task

# store medical AI tasks in a list
medical_ai_tasks_file = "../results/medical_ai_tasks.json"
with open(medical_ai_tasks_file, 'r') as file:
    medical_ai_tasks = json.load(file)  # Load JSON data from file
test_task = medical_ai_tasks[6]

# store subtopics in a list
subtopics_file = "../results/medical_subtopics.json"
with open(subtopics_file, 'r') as file:
    subtopics_data = json.load(file)  # Load JSON data from file

# Extract subtopics
subtopics = []
for item in subtopics_data:
    subtopics.extend(item['subtopic'])  # Add each subtopic list to the main list
test_subtopic = subtopics[25]

content_2 = "You are an assistant responsible for creating prompts that healthcare workers would ask a medical AI chatbot."
prompt_2 = f'''Generate a prompt that a physician might ask an AI chatbot when tasked with {test_task["task"]} in the context of {test_subtopic} in the medical field.
{test_task["task"]} is described as: {test_task["description"]}
To create a realistic prompt, follow these additional instructions: {test_task["additional_instruction"]}
Only include the generated prompt, adding extra details only if specified. Focus solely on a realistic prompt a physician might ask a medical AI chatbot.
'''

In [4]:
# Test version of interacting with the openai API
# Generates only 1 output and prints it

path_to_api_key: str = "../API_KEY.txt"
my_api_key = open(path_to_api_key, 'r').read()
client = OpenAI(api_key=my_api_key)
gpt_model: str = "gpt-4o"
content = content_2
prompt = prompt_2


print("Send prompt to GPT:")
print("#########")
print(prompt)
print("########")

completion = client.chat.completions.create(
    model= gpt_model,
    messages=[
        {"role": "system", "content": content},
        {
            "role": "user",
            "content": f"{prompt}"
        }
    ]
)
print("Receiving responses from GPT...")
print(completion.choices[0].message.content)

Send prompt to GPT:
#########
Generate a prompt that a physician might ask an AI chatbot when tasked with Clinical Documentation in the context of Endocrine Pathophysiology in the medical field.
Clinical Documentation is described as: Transcribing notes of a healthcare worker.
To create a realistic prompt, follow these additional instructions: Generate notes that a healthcare worker would write during an anamnesis. The healthcare worker would then want these notes transcribed for inclusion in an official medical record.
Only include the generated prompt, adding extra details only if specified. Focus solely on a realistic prompt a physician might ask a medical AI chatbot.

########
Receiving responses from GPT...
"Please transcribe the following anamnesis notes for the patient's medical record: The patient is a 52-year-old female presenting with fatigue, weight gain, and cold intolerance over the past six months. She reports minimal hair loss and changes in skin texture. Her family hist

### Part 2 - Generate list of topics

In [None]:
# Helper function to get output in desired .json format

def get_desired_format(topic):
    return f'''The format of the output has to be the following:
    {{
        "domain": {topic},
        "subtopic": [
          "subtopic_1,
          "subtopic_2,
          "subtopic_3
        ]
    }}

    Here are some examples where we have a medical professions and then a list of specialities:

    Example 1:

    {{
        "profession": "Pharmacist",
        "specialties": [
          "Clinical Pharmacy",
          "Community Pharmacy",
          "Consultant Pharmacy",
          "Hospital Pharmacy",
          "Industrial Pharmacy",
          "Nuclear Pharmacy",
          "Oncology Pharmacy"
        ]
    }}

    Example 2:

    {{
      "profession": "Physical Therapist",
      "specialties": [
        "Cardiovascular and Pulmonary",
        "Geriatric",
        "Neurological",
        "Orthopedic",
        "Pediatric",
        "Sports",
        "Women's Health"
      ]
    }}

    Example 3:

    {{
      "profession": "Occupational Therapist",
      "specialties": [
        "Geriatric Occupational Therapy",
        "Mental Health Occupational Therapy",
        "Pediatric Occupational Therapy",
        "Physical Rehabilitation",
        "Hand Therapy"
      ]
    }}

    The output you generate, has therefor to start with:
    {{
        "domain": "{topic}",
        "subtopic": [
        
    '''

In [None]:
# Generate topics using openai API

path_to_data: str = "../results/medical_topics.json"
output_path: str = '../results/medical_subtopics.json'


# Load JSON data from a file
with open(path_to_data, "r") as f:
    data = json.load(f)

outputs = {}

path_to_api_key: str = "### UNCOMMENT TO RUN -> WILL COST MONEY###../API_KEY.txt"
my_api_key = open(path_to_api_key, 'r').read()
client = OpenAI(api_key=my_api_key)
gpt_model: str = "gpt-4o"

print("Send prompts to GPT and process them")
print("Receiving responses from GPT...")

N_real = len(data)
N_test = 0

for i in tqdm(range(N_test)): # Change to N_real -> WILL COST MONEY
    topic = data[i]["topic"]
    content = "You are an assistant to create an overview of the medical field"
    prompt = f'''Generate for the medical domain: {topic} 10 - 20 subtopics and/or subdomains.'''

    desired_format = get_desired_format(topic)

    completion = client.chat.completions.create(
        model= gpt_model,
        messages=[
            {"role": "system", "content": content},
            {
                "role": "user",
                "content": f"{prompt}{desired_format}"
            }
        ]
    )

    outputs[i] = completion.choices[0].message.content

print("Received all responses from GPT")

Send prompts to GPT and process them
Receiving responses from GPT...


100%|██████████| 49/49 [03:25<00:00,  4.19s/it]

Received all responses from GPT





In [None]:
# Correct format mistakes by hand

outputs[24] = '```json\n{\n    "domain": "Radiology",\n    "subtopic": [\n        "Diagnostic Radiology",\n        "Interventional Radiology",\n        "Nuclear Medicine",\n        "Pediatric Radiology",\n        "Neuroradiology",\n        "Breast Imaging",\n        "Musculoskeletal Radiology",\n        "Thoracic Imaging",\n        "Abdominal Imaging",\n        "Cardiovascular Radiology",\n        "Emergency Radiology",\n        "Oncologic Imaging",\n        "Gastrointestinal Radiology",\n        "Genitourinary Radiology",\n        "Radiation Oncology",\n        "Ultrasound",\n        "Mammography",\n        "Computed Tomography (CT)",\n        "Magnetic Resonance Imaging (MRI)",\n        "Fluoroscopy"\n    ]\n}\n```'

In [55]:
parsed_output = []

for i in range(len(data)):
    parsed_output.append(json.loads(outputs[i].replace("```json", "").replace("```", "")))

In [56]:
for i in range(len(parsed_output)):
    print(parsed_output[i])

{'domain': 'Human Anatomy and Physiology', 'subtopic': ['Cell Biology', 'Skeletal System', 'Muscular System', 'Nervous System', 'Endocrine System', 'Cardiovascular System', 'Respiratory System', 'Digestive System', 'Urinary System', 'Reproductive System', 'Integumentary System', 'Lymphatic System', 'Immune System', 'Neurophysiology', 'Exercise Physiology', 'Comparative Physiology', 'Pathophysiology', 'Developmental Biology', 'Histology', 'Biochemistry']}
{'domain': 'Pathophysiology', 'subtopic': ['Inflammation', 'Immune Response', 'Cellular Injury and Adaptation', 'Neoplasia and Cancer Pathogenesis', 'Genetic Disorders', 'Endocrine Pathophysiology', 'Cardiovascular Pathophysiology', 'Respiratory Pathophysiology', 'Gastrointestinal Pathophysiology', 'Renal Pathophysiology', 'Hematologic Pathophysiology', 'Nervous System Pathophysiology', 'Musculoskeletal Pathophysiology', 'Liver Disease Pathophysiology', 'Infectious Disease Pathophysiology', 'Reproductive Pathophysiology', 'Pain Pathoph

In [58]:
with open(output_path, 'w') as file:
    json.dump(parsed_output, file, indent=4)

print(f"Data successfully written to {output_path}")

Data successfully written to ../results/medical_subtopics.json
