Import Statements

In [1]:
import random
from typing import List, Tuple

Helper Functions

In [2]:
def return_days_inside_3_day_window(window_center_day: int, discharge_day: int) -> List[int]:
	"""Given the day number at the center of a 3-day window and the current discharge day,
	returns all possible days within that 3-day window that still fit within the patient stay

	Args:
		window_center_day (int): day number of event at the center of the window
		discharge_day (int): current discharge day before the function is called

	Returns:
		List[int]: list of all day numbers that can be chosen bc they're within the 3 day 
		window and the patient's stay
	"""
	# first day in window is 3 days before event OR admission day, whichever comes last
	earliest_possible_day = max(1, window_center_day - 3)

	# last day in window is 3 days after event OR discharge day, whichever comes first
	latest_possible_day = min(window_center_day + 3, discharge_day)

	# range collects consecutive numbers from first number to last number-1 
	possible_findings_days = list(range(earliest_possible_day, latest_possible_day + 1))
	return possible_findings_days



def get_days_outside_3_day_window(window_center_day: int, discharge_day: int) -> Tuple[List[int], int]:
	"""Given the day number at the center of a 3-day window and the current discharge day,
	returns all possible days outside that 3-day window that still fit within the patient stay
	AND ALSO the discharge day, which might be increased -
	If the patient stay is too short (so all patient stay days are inside the 3-day window), 
	the discharge day will be increased (with some randomization of up to 5 extra days) to 
	ensure that there's at least one day outside the 3-day window

	Args:
		window_center_day (int): day number of event at the center of the window
		discharge_day (int): current discharge day before the function is called

	Returns:
		Tuple[List[int], int]: first return variable is a list of all day numbers that can be 
		chosen bc they're outside the 3 day window, second return variable is the (possibly larger)
		discharge day. After calling this function, we must check to see if externally-stored value of
		discharge day needs to be updated to match.
	"""
	dis_day = discharge_day

	# if event is very early in stay, the window includes admission (day 1)
	if window_center_day < 5:
		# findings must happen after window but before discharge
		last_window_day = window_center_day + 3
		
		# make sure there are days in stay after window
		if last_window_day >= discharge_day:
			min_extra_days_needed = last_window_day - discharge_day + 1
			# lengthen stay by increasing discharge day number
			dis_day = discharge_day + random.randint(min_extra_days_needed, min_extra_days_needed + 5)

		possible_findings_days = list(range(last_window_day + 1, dis_day + 1))

	else:
		# collect day numbers in stay before the 3-day window
		left_of_window_days = list(range(1, window_center_day - 3))

		# collect day numbers in stay after the 3-day window
		right_of_window_days = list(range(window_center_day + 4, (dis_day + 1)))

		# choose a random day from the combined list of possible outside-of-window days in stay
		possible_findings_days = left_of_window_days + right_of_window_days

	return possible_findings_days, dis_day

def multiple_choice_question(answer: str, choice_type: str, choice_list: List[str]) -> str:
	"""Given the answer to a multiple choice question (which may be "none", a phrase to make 
	the generic prompt fit the specific situation, and the list of possible choices (phrases),
	returns an appropriate, randomized prompt. Example: given answer = "none", choice_type is = "infection symptoms",
	choice_list = ["fever", "redness", "swelling"], returned value will be "The medical record should not mention 
	any of the following infection symptoms: fever, redness, swelling." If the answer is not "none",
	the function will choose a random NON-ZERO number of items from the list and build a prompt like this: 
	"The medical record should note the following infection symptoms: redness, swelling"

	Args:
		answer (str): "none" or "yes", the desired answer to an algorithm question
		choice_type (str): short phrase as described above
		choice_list (List[str]): list of all possible multiple choice answers

	Returns:
		str: the complete prompt based on the parameters as described above
	"""
	if answer == "none":
		choices_together = ", ".join(choice_list)
		prompt = f"The medical record should not mention any of the following {choice_type}: {choices_together}."
	else:
		num_to_select = random.randint(1, len(choice_list))
		selected_elements = random.sample(choice_list, num_to_select)
		elements_together = ", ".join(selected_elements)
		prompt = f"The medical record should note the following {choice_type}: {elements_together}."
	return prompt

QA Dictionary Definition

In [3]:
# this is used for file naming
algo_str = "birth_maternal"
prompt_run = "1" # to generate a larger set of pdfs, increase this number and re-run. They'll be saved in a separate file instead of overwriting the first set.

birth_maternal_prompt_qa_dict = {}

birth_maternal_prompt_qa_dict[1] = {
    "EQR1": "no"
}

birth_maternal_prompt_qa_dict[2] = {
    "EQR1": "yes",
    "EQ1": "no"
}

birth_maternal_prompt_qa_dict[3] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "injury",
    "Q2": "yes" # nonbranching placeholder value
}

birth_maternal_prompt_qa_dict[4] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "none"
}

birth_maternal_prompt_qa_dict[5] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "atvd" # choices are atvd, vd, csec
}

birth_maternal_prompt_qa_dict[6] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "vd", # choices are atvd, vd, csec
    "Q4": "yes", # non-branching numerical answer dependent on R1
    "R1": "no"
}

birth_maternal_prompt_qa_dict[7] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "vd", # choices are atvd, vd, csec
    "Q4": "yes", # non-branching numerical answer dependent on R1
    "R1": "yes",
    "Q5": "no"
}

birth_maternal_prompt_qa_dict[8] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "vd", # choices are atvd, vd, csec
    "Q4": "yes", # non-branching numerical answer dependent on R1
    "R1": "yes",
    "Q5": "yes",
    "Q6": "no"
}

birth_maternal_prompt_qa_dict[9] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "vd", # choices are atvd, vd, csec
    "Q4": "yes", # non-branching numerical answer dependent on R1
    "R1": "yes",
    "Q5": "yes",
    "Q6": "yes"
}

birth_maternal_prompt_qa_dict[10] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "csec", # choices are atvd, vd, csec
    "Q6a": "yes", # non-branching numerical answer
    "Q7": "yes", # non-branching numerical answer
    "R1a": "no",
    "R2": "no" # only yes if Q1=none or Q6=yes or Q7a=no
}

birth_maternal_prompt_qa_dict[11] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "csec", # choices are atvd, vd, csec
    "Q6a": "yes", # non-branching numerical answer
    "Q7": "yes", # non-branching numerical answer
    "R1a": "yes",
    "Q7a": "no", # no here makes R2 true
}

birth_maternal_prompt_qa_dict[12] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "csec", # choices are atvd, vd, csec
    "Q6a": "yes", # non-branching numerical answer
    "Q7": "yes", # non-branching numerical answer
    "R1a": "yes",
    "Q7a": "yes",
    "Q8": "yes",
    "Q9": "yes", # non-branching number above or below 39
    "Q10": "yes" # non-branching multiple choice
}

birth_maternal_prompt_qa_dict[13] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "atvd", # choices are atvd, vd, csec
    "Q6a": "yes", # non-branching numerical answer
    "Q7": "yes", # non-branching numerical answer
    "R1a": "yes",
    "Q7a": "yes",
    "Q8": "no",
    "R3": "yes",
    "Q11": "yes" # non-branching multiple choice
}

birth_maternal_prompt_qa_dict[14] = {
    "EQR1": "yes",
    "EQ1": "yes",
    "Q1": "yes", # options besides injury or none
    "Q3": "csec", # choices are atvd, vd, csec
    "Q6a": "yes", # non-branching numerical answer
    "Q7": "yes", # non-branching numerical answer
    "R1a": "yes",
    "Q7a": "yes",
    "Q8": "no",
    "R3": "no"
}

Build Prompt Functions

In [None]:
def build_EQR1_prompt(answer, patient_age):
    if answer == "yes":
        patient_age = random.randint(10, 65)
        EQR1_prompt = f"Patient is a female."
    else:
        EQR1_prompt = f"Patient is not a female."
    return EQR1_prompt, patient_age

def build_EQ1_prompt(answer):
    if answer == "yes":
        EQ1_prompt = f"The patient delivered during the stay."
    else:
        EQ1_prompt = f"The patient did not deliver during the stay."
    return EQ1_prompt

def build_Q1_prompt(answer):
    outcomes_list = [
        "chorioamnionitis",
        "endometritis",
        "hemorrhage requiring transfusion",
        "eclampsia (pre-eclampsia plus seizures or convulsions)",
        "third-or-fourth-degree perineal laceration"
        ]
    if answer == "injury":
        Q1_prompt = "The maternal outcome that ocurred during the stay was an injury to a body part."
    elif answer == "yes":
        rand_outcome = random.choice(outcomes_list)
        Q1_prompt = f"The maternal outcome that occurred during the stay was {rand_outcome}."
    else: # none
        outcomes_list.append("injury to a body part")
        all_outcomes_str = ", ".join(outcomes_list)
        Q1_prompt = f"The record must not mention any of the following: {all_outcomes_str}."
    return Q1_prompt

def build_Q2_prompt():
    injury_list = ["uterus", "ureter", "bladder", "bowel"]
    injury_loc = random.choice(injury_list)
    Q2_prompt = f"The injury was to the {injury_loc}."
    return Q2_prompt

def build_Q3_prompt(answer):
    if answer == "vd":
        Q3_prompt = f"The patient had a vaginal delivery."
    elif answer == "csec":
        Q3_prompt = f"The patient had a scheduled cesarean section with no attempted vaginal delivery."
    else: # atvd
        Q3_prompt = f"The patient had attempted vaginal delivery which was followed by cesarean section."
    return Q3_prompt

# placeholder, R1 currently determines this
def build_Q4_prompt():
    Q4_prompt = ""
    return Q4_prompt

# what was estimated gestational age and was it >= 39 weeks?
def build_R1_prompt(answer, ega_del):
    if answer == "yes":
        ega_del = random.randint(39,43)
        R1_prompt = f"The patient's gestational age in weeks is {ega_del}."
    else:
        ega_del = random.randint(30,38)
        R1_prompt = f"The patient's gestational age in weeks is {ega_del}."
    return R1_prompt, ega_del

def build_Q5_prompt(answer):
    if answer == "yes":
        common_antibiotics = ["cefazolin", "gentamicin", "clindamycin"]
        rand_abx = random.choice(common_antibiotics)
        Q5_prompt = f"Patient was given {rand_abx}."
    else:
        Q5_prompt = f"Patent was not given an antibiotic."
    return Q5_prompt

def build_Q6_prompt(answer):
    if answer == "yes":
        random_hours = random.randint(25,40)
        Q6_prompt = f"The antibiotic was administered {random_hours} hours following delivery of neonate."
    else:
        random_hours = random.randint(1,23)
        Q6_prompt = f"The antibiotic was administered {random_hours} hours following delivery of neonate."
    return Q6_prompt

def build_Q6a_prompt():
    num_fetuses = random.randint(1,4)
    Q6a_prompt = f"There were {num_fetuses} fetuses delivered."
    return Q6a_prompt, num_fetuses

# Placeholder; this is currently determined by R1a
def build_Q7_prompt():
    Q7_prompt = ""
    return Q7_prompt

def build_R1a_prompt(answer, num_fetuses):
    if answer == "yes":
        if num_fetuses == 1:
            R1a_prompt = "No fetuses were delivered alive."
        else:
            alive_fetuses = random.randint(0, num_fetuses-1) 
            R1a_prompt = f"{alive_fetuses} out of {num_fetuses} fetuses were delivered alive."
    else:
        R1a_prompt = "All fetuses were delivered alive."
    return R1a_prompt

# Placeholder - this is a check based on Q1, !6, Q7a - dicts determine value
def build_R2_prompt():
    R2_prompt = ""
    return R2_prompt

def build_Q7a_prompt(answer):
    if answer == "yes":
        Q7a_prompt = f"The fetal death was expected."
    else:
        Q7a_prompt = f"The fetal death was not expected."
    return Q7a_prompt

def build_Q8_prompt(answer):
    if answer == "yes":
        Q8_prompt = f"Labor was induced."
    else:
        Q8_prompt = f"Labor was not induced."
    return Q8_prompt

# what was ega at time of induction, and was it >= 39? 
# we already have ega at time of delivery, so induction must be <= that too
def build_Q9_prompt(ega_del):
    gestational_induct = random.randint(34, ega_del)
    Q9_prompt = f"The estimated gestational age at time of induction was {gestational_induct}."
    return Q9_prompt

def build_Q10_prompt():
    maternal_conditions = [
        "Diabetes mellitus",
        "Premature rupture of the membranes",
        "Pregnancy induced hypertension, including mild, moderate, or severe pre-eclampsia, or eclampsia", 
        "IUGR (intrauterine growth retardation) or SGA (small for gestational age)",
        "Cardiac disease",
        "Post maturity (41 or more weeks of pregnancy completed)",  
        "Isoimmunization (e.g., Rh disease)",
        "Chorioamnionitis",  
        "Abruptio placentae"
    ]
    rand_condition = random.choice(maternal_conditions)
    Q10_prompt = f"The following condition was present prior to induction: {rand_condition}."
    return Q10_prompt

# placeholder; delivery type is already determined; current story dicts force matches
def build_R3_prompt():
    R3_prompt = f"" 
    return R3_prompt


def build_Q11_prompt():
    instrument_list = ["vaccum", "forceps", "vacuum followed by forceps"]
    rand_instrument = random.choice(instrument_list)
    prompt_list = [
        f"The {rand_instrument} was used to aid in delivery.",
        f"There was not any instrumentation such as vacuum or forceps used during the birth process"
    ]
    Q11_prompt = random.choice(prompt_list)
    return Q11_prompt

Main Program

In [5]:
# use this at the end of the main program to save each full GPT-ready prompt to a json file
# this file name shows what story the resulting PDF is; we'll use that during manual PDF generation
# so we'll store it in the JSON too
def generate_pdf_file_name(algo_str, story_number, prompt_run):
    # dictionaries don't store their key-value pairs in order, but sorting alphabetically will fix that
    sorted_items = sorted(birth_maternal_prompt_qa_dict[story_number].items())
    
    # Correct way to join key-value pairs
    formatted_string = prompt_run + "-" + algo_str + "_story" + str(story_number) + "_" + "_".join(f"{key}_{value}" for key, value in sorted_items)

    return f"{formatted_string}.pdf"

In [18]:
story_numbers = list(birth_maternal_prompt_qa_dict.keys())
story_prompts_dict = {} # this will hold the full GPT-ready prompt for each story.

In [19]:
for num in story_numbers:

    list_of_prompts = [] # you can't change strings, so we'll buid a list of prompts based
    # on what question keys are in the story dictionary, add some basics about age, etc, 
    # and at the very end, join them together into a string and save it in the story_prompts_dict[num].

    # set up basic data about the stay that might be changed by functions
    discharge_day_number = random.randint(4, 8)
    patient_age = random.randint(1, 99)

    # create placeholders for variables that might get set / passed around between functions
    ega_delivery = random.randint(39,43)
    num_fetuses_delivered = -1

    # collect into a list the questions that are part of this story by their key (EQR1, Q3, etc)
    question_keys = list(birth_maternal_prompt_qa_dict[num].keys())

    ###### in this section, check if each key is in the question_keys list and, if so, call their function
    # and use the returned value to update list_of_prompts, any other variables.
    if "EQR1" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["EQR1"]
        eqr1_prompt, patient_age = build_EQR1_prompt(ans, patient_age)
        list_of_prompts.append(eqr1_prompt)
    if "EQ1" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["EQ1"]
        eq1_prompt = build_EQ1_prompt(ans)
        list_of_prompts.append(eq1_prompt)
    if "Q1" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q1"]
        q1_prompt = build_Q1_prompt(ans)
        list_of_prompts.append(q1_prompt)
    if "Q2" in question_keys:
        q2_prompt = build_Q2_prompt()
        list_of_prompts.append(q2_prompt)
    if "Q3" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q3"]
        q3_prompt = build_Q3_prompt(ans)
        list_of_prompts.append(q3_prompt)
    if "Q4" in question_keys:
        q4_prompt = build_Q4_prompt()
        list_of_prompts.append(q4_prompt)
    if "R1" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["R1"]
        r1_prompt, ega_delivery = build_R1_prompt(ans, ega_delivery)
        list_of_prompts.append(r1_prompt)
    if "Q5" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q5"]
        q5_prompt = build_Q5_prompt(ans)
        list_of_prompts.append(q5_prompt)
    if "Q6" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q6"]
        q6_prompt = build_Q6_prompt(ans)
        list_of_prompts.append(q6_prompt)
    if "Q6a" in question_keys:
        q6a_prompt, num_fetuses_delivered = build_Q6a_prompt()
        list_of_prompts.append(q6a_prompt)
    if "Q7" in question_keys:
        q7_prompt = build_Q7_prompt()
        list_of_prompts.append(q7_prompt)
    if "R1a" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["R1a"]
        r1a_prompt = build_R1a_prompt(ans, num_fetuses_delivered)
        list_of_prompts.append(r1a_prompt)
    if "R2" in question_keys:
        r2_prompt = build_R2_prompt()
        list_of_prompts.append(r2_prompt)
    if "Q7a" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q7a"]
        q7a_prompt = build_Q7a_prompt(ans)
        list_of_prompts.append(q7a_prompt)
    if "Q8" in question_keys:
        ans = birth_maternal_prompt_qa_dict[num]["Q8"]
        q8_prompt = build_Q8_prompt(ans)
        list_of_prompts.append(q8_prompt)
    if "Q9" in question_keys:
        q9_prompt = build_Q9_prompt(ega_delivery)
        list_of_prompts.append(q9_prompt)
    if "Q10" in question_keys:
        q10_prompt = build_Q10_prompt()
        list_of_prompts.append(q10_prompt)
    if "R3" in question_keys:
        r3_prompt = build_R3_prompt()
        list_of_prompts.append(r3_prompt)
    #######


    # after all build prompt functions that should be called are, add general prompts about the stay that might
    # not have been stated yet (remove duplicates at the end)
    list_of_prompts.append(f"Patient is {patient_age} years old.")
    list_of_prompts.append(f"Patient was discharged on day number {discharge_day_number}.")

    prompt_string = " ".join(list_of_prompts)
    story_prompts_dict[num] = prompt_string


In [20]:
# when the loop is done running (prompts are generated for all stories)
# print to screen to be sure everything looks right
for num in story_numbers:
    print(f"full prompt to generate story number {num}:")
    print(story_prompts_dict[num])
    print()

full prompt to generate story number 1:
Patient is not a female. Patient is 9 years old. Patient was discharged on day number 5.

full prompt to generate story number 2:
Patient is a female. The patient did not deliver during the stay. Patient is 23 years old. Patient was discharged on day number 6.

full prompt to generate story number 3:
Patient is a female. The patient delivered during the stay. The maternal outcome that ocurred during the stay was an injury to a body part. The injury was to the uterus. Patient is 41 years old. Patient was discharged on day number 6.

full prompt to generate story number 4:
Patient is a female. The patient delivered during the stay. The record must not mention any of the following: chorioamnionitis, endometritis, hemorrhage requiring transfusion, eclampsia (pre-eclampsia plus seizures or convulsions), third-or-fourth-degree perineal laceration, injury to a body part. Patient is 23 years old. Patient was discharged on day number 7.

full prompt to ge

In [None]:
import csv
csv_output_file = f"{algo_str}_prompts_{prompt_run}.csv"
with open(csv_output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write header
    writer.writerow(["story_definition", "story_prompt"])

    # Write each row
    for num in story_numbers:
        story_definition = generate_pdf_file_name(algo_str, num, prompt_run)  # Function to get filename
        story_prompt = story_prompts_dict.get(num, "")  # Retrieve prompt, default to empty if missing

        writer.writerow([story_definition, story_prompt])

In [None]:
import json
json_output_file = f"{algo_str}_prompts_{prompt_run}.json"
data = []
# Build JSON data
for num in story_numbers:
    story_definition = generate_pdf_file_name(algo_str, num, prompt_run)  # Function to get filename
    story_prompt = story_prompts_dict.get(num, "")  # Retrieve prompt, default to empty if missing

    data.append({
        "story_definition": story_definition,
        "story_prompt": story_prompt
    })
# Save to JSON file
with open(json_output_file, "w", encoding="utf-8") as file:
    json.dump(data, file, indent=4, ensure_ascii=False)