In [None]:
%%capture
# !pip install openai
!pip install openai==0.28 #use this version since the API was updated
!pip install evaluate
!pip install rouge_score

In [None]:
import pandas as pd
import openai
import evaluate
import numpy as np

In [None]:
#generic prompt
summarize_raw_instruction = """[target_trainable_instruction]

SOAP note section:
[section]
Conversation snippet:
[Conv_snippet]

Output your summary.
Return the output as a dictionary object, adhering to the following structure:
{"summary": ...}
Please provide your response solely in the dictionary format without including any additional text.
"""

In [None]:
valA = pd.read_csv('TaskA-ValidationSet.csv')

In [None]:
sections = valA['section_header'].unique().tolist()
print(sections)
sections = ["GENHX"]
max_tokens = []

for s in sections:
  s_df = valA[valA['section_header'] == s]
  avg_chars = s_df['section_text'].str.len().mean()
  max_tokens.append(int(round(avg_chars/4)))
print(max_tokens)

In [None]:
openai.api_key = #insert API key here

In [None]:
def get_al(model, prompt, max_tokens):
  response = openai.Completion.create(
            engine= model,
            prompt= prompt,
            # max_tokens=max_tokens,
            n=1,
            stop=None,
            temperature=0.2,
            timeout=45
        )
  return response.choices[0].text.strip()

In [None]:
def get_al_chat(model, prompt, max_tokens):
  response = openai.ChatCompletion.create(
              model=model,
              messages=[
                  {"role": "system", "content": "You are a smart doctor as well as a professional medical scribe with a lot of experience."},
                  {"role": "user", "content": prompt},
              ],
              temperature=0.2,
              # max_tokens=max_tokens,
              timeout=45,
              n=1,
              stop=None
          )

  return response['choices'][0]['message']['content']

In [None]:
model_lst = ["text-ada-001", "text-babbage-001","text-curie-001", "text-davinci-003", "gpt-3.5-turbo", "gpt-4"]

In [None]:
#my prompts
def get_prompt(dia, section):
  prompts = {
    "EXAM": f"Task: I have a doctor-patient dialogue. Please list each body part examined by the doctor along with what was observed as a result for each body part. Please be concise. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\nDialogue: \n{dia} \n\nExaminations:",
    "OTHER_HISTORY": f"Task: I have a doctor-patient dialogue. Please output the patient's medical history and other history. If nothing has changed in the patient's history, say so. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\nDialogue: \n{dia} \n\nPatient's history:",
    "CC": f"Task: I have a doctor-patient dialogue. Please output the patient's chief complaint, which are the reasons/symptoms of why the patient is visiting the doctor. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Chief complaint:",
    "DIAGNOSIS": f"Task: I have a doctor-patient dialogue. Please list the patient's diagnosis. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Diagnosis:",
    "FAM/SOCHX": f"Task: I have a doctor-patient dialogue. Please output the patient's family medical history and the patient's social history. If the history is unknown, say so. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Patient's family history and social history:",
    "MEDICATIONS": f"Task: I have a doctor-patient dialogue. Please list all the medications taken by the patient as part of a doctor's note. If no medications are mentioned, please output None. You are a smart doctor that knows this. \n\n Dialogue: \n{dia} \n\n Medications:",
    "PROCEDURES": f"Task: I have a doctor-patient dialogue. Please output the patient's surgical history, past procedures, current procedures, as well as the outcomes of the procedures/surgeries. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo for naming the procedures/surgeries as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Procedures/surgeries:",
    "ALLERGY": f"Task: I have a doctor-patient dialogue. Please output the patient's allergies. Please use medical terminology and lingo as much as possible. Please output None if there are no allergies. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Allergies:",
    "GENHX": f"Task: I have a doctor-patient dialogue. Please output the patient's history regarding the present illness. That is, output the history of the issue the patient is currently suffering from. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Present illness history:",
    'ROS': f"Task: I have a doctor-patient dialogue. Please output the review of systems (ROS) regarding the patient, that is the signs and/or symptoms which the patient may be experiencing or has experienced and on which part of the body. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo for naming the symptoms and issues as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Review of systems:",
    'PASTMEDICALHX':f"Task: I have a doctor-patient dialogue. Please output the patient's past medical history. If nothing has changed in the patient's history, say so. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Patient's medical history:",
    'PASTSURGICAL':f"Task: I have a doctor-patient dialogue. Please list all the of patient's past surgeries as part of a doctor's note. If no surgeries, please output None. If surgeries are unknown, please output Unknown. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Past surgeries:",
    'DISPOSITION': f"Task: I have a doctor-patient dialogue. Please output the patient's disposition. Please be concise. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Disposition:",
    'EDCOURSE': f"Task: I have a doctor-patient dialogue. The dialogue either mentions what happened in the emergency room (ER) or is currently taking place at the emergency room. Please summarize what happened or is happening at the emergency room. Output what the patient suffered/is suffering from and what the treatments were/are and the dosages if mentioned. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Emergency room summary:",
    'PLAN': f"Task: I have a doctor-patient dialogue. Please list the plan the patient has been following and/or the plan the patient and the doctor agreed on to follow from now on. Please list only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Plan:",
    "LABS": f"Task: I have a doctor-patient dialogue. Please output the results of the lab tests. Please don't add any other information, only list the lab results. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Lab results:",
    'ASSESSMENT': f"Task: I have a doctor-patient dialogue. Please list the doctor's medical observations about the patient as well as what the patient mentions about themselves medically. Basically, list everything medical from the assessment. Please only list what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Assessment:",
    'GYNHX': f"Task: I have a doctor-patient dialogue. Please output anything that was mentioned related to gynecology, obstetrics, OB/GYN, pap smears, pregnancy, menstrual cycles (periods), organs related to OB/GYN, results related to OB/GYN, issues related to OB/GYN, history related to OB/GYN, etc. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n OB/GYN:",
    'IMAGING': f"Task: I have a doctor-patient dialogue. Please output any imaging that was done and the results of the imaging. Imaging such as x-ray, EKG, CBC, radiology, ultrasound, etc. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Imaging:",
    'IMMUNIZATIONS': f"Task: I have a doctor-patient dialogue. Please output the patient's immunizations, vaccines, how up-to-date the immunizations are, and the date of when the immunizations were given. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Immunizations and their dates:",
  }
  return prompts[section]

In [None]:
#generic prompt
# def get_prompt(dia, section):
#     generic_prompt = """In this task, we ask for your expertise in writing SOAP notes from the doctor-patient conversation.
#     Mainly we provide the target section in the SOAP note and the conversation snippet.
#     We need you to generate a summary for the respective snippet.
#     """
#     return generic_prompt

In [None]:
#Med Student A
# def get_prompt(dia, section):
#   prompts = {
#     "OTHER_HISTORY": f"Task: I have a doctor-patient dialogue. Please output the patient's medical history and other history,including asking about the history of heart disease, hypertension, diabetes, and tumor.If nothing has changed in the patient's history, say so. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Patient's history:",
#     "CC": f"Task: I have a doctor-patient dialogue. Please output the patient's chief complaint, which are the reasons/symptoms of why the patient is visiting the doctor.Include how long they have been sick so far Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Chief complaint:",
#     "DIAGNOSIS": f"Task: I have a doctor-patient dialogue. Please list the patient's diagnosis:List the main diagnosis as the first item, and list the previous diagnoses in order of importance.Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Diagnosis:",
#     "FAM/SOCHX": f"Task: I have a doctor-patient dialogue. Please output the patient's family medical history and the patient's social history. If the history is unknown, say so. Mainly about immediate family members about genetic disorders associated with the disease.Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Patient's family history and social history:",
#     "MEDICATIONS": f"Task: I have a doctor-patient dialogue. Please list all the medications taken by the patient as part of a doctor's note and include the date the drug was started, as well as the dose and frequency of use. If no medications are mentioned, please output None. You are a smart doctor that knows this. \n\n Dialogue: \n{dia} \n\n Medications:",
#     "PROCEDURES": f"Task: I have a doctor-patient dialogue. Please output the patient's surgical history, past procedures, current procedures, as well as the outcomes of the procedures/surgeries.If there is follow-up drug treatment, please list it together.Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo for naming the procedures/surgeries as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Procedures/surgeries:",
#     "ALLERGY": f"Task: I have a doctor-patient dialogue. Please output the patient's allergies.includes food allergies and drug allergies, especially penicillin allergies. Please use medical terminology and lingo as much as possible. Please output None if there are no allergies. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Allergies:",
#     'PASTSURGICAL':f"Task: I have a doctor-patient dialogue. Please list all the of patient's past surgeries as part of a doctor's note. Including the specific time of operation and postoperative treatment.If no surgeries, please output None. If surgeries are unknown, please output Unknown. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Past surgeries:",
#     'LABS': f"Task: I have a doctor-patient dialogue. Please output the results of the lab tests.Laboratory tests need to be relevant to this diagnosis, listing abnormal laboratory indicators. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Lab results:",
#     'IMAGING': f"Task: I have a doctor-patient dialogue. Please output any imaging that was done and the results of the imaging. Imaging such as x-ray, EKG, CBC, radiology, ultrasound, etc. And indicate the date the image was taken, as well as the abnormal location and description in the image.Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n Imaging:",
#   }
#   return prompts[section]

In [None]:
#Med Student B
# def get_prompt(dia, section):
#   prompts = {
#     "ASSESSMENT": f"Task: Besides diagnosis and differential diagnosis, assessment could include possible underlying reasons that induce the patient’s problems, the need for therapy, and therapy options. \n\n Dialogue: \n{dia} \n\n Assessment:",
#     "GENHX": f"Task: Please include the following information if mentioned in the dialogue: the onset of symptoms, quality and intensity of symptoms, course (sudden, gradual, constant, or on and off), duration of symptoms, associated symptoms, factors that improve or exacerbate symptoms, triggers or the patient’s own explanation of the cause of the symptoms, etc. \n\n Dialogue: \n{dia} \n\n Present illness history:",
#     "PLAN": f"Task: Plan might constitute three parts- the diagnostic plan which includes lab tests, imaging, and some other diagnostic procedures needed to be done for a solid diagnosis; the therapeutic plan which includes therapeutic procedures or surgery plan, medications, and other detailed treatments such as supportive care; the education plan includes long-term self-management, monitoring and following-up, prognosis, preventions, lifestyle adjustment, etc. Please list all the information about the doctor's plan mentioned in the dialogue, and do not add new information. \n\n Dialogue: \n{dia} \n\n Plan:",
#     "GYNHX": f"Task: I have a doctor-patient dialogue. Please output anything that was mentioned related to gynecology, obstetrics, OB/GYN, pap smears, pregnancy, menstrual cycles (periods), organs related to OB/GYN, results related to OB/GYN, issues related to OB/GYN, history related to OB/GYN, along with mentioned sexual history, current/past contraception methods use, gravida & parity experience, etc. Please output only what was mentioned in the conversation and don't add any other new information. Please use medical terminology and lingo as much as possible. You are a smart doctor and a professional medical scribe. \n\n Dialogue: \n{dia} \n\n OB/GYN:",
#     "ROS": f"Task: A review of systems should be a list of questions about whether the symptoms exist arranged by organ systems. For example, 'the patient confirms fever and vomiting and denies abdominal pain, diarrhea, or jaundice.' \n\n Dialogue: \n{dia} \n\n Review of systems:",
#     "OTHER_HISTORY": f"Task: Other history is a relatively condensed category of issues in the patient’s history that are known to have clinical relevance to the purpose of the current visit but do not fit well in any other category of history. \n\n Dialogue: \n{dia} \n\n Patient's history:",
#     "DIAGNOSIS": f"Task: The diagnosis should be for the purpose of this visit. You can add “?” after it if there’s not enough information to make a solid diagnosis or just not sure. And you can list some differential diagnoses here. (based on S.O.A.P.) \n\n Dialogue: \n{dia} \n\n Diagnosis:",
#     "FAM/SOCHX": f"Task: Family history is a history of disease of first and second-degree blood relatives. Please list the age and health status of them if mentioned. If some of them have any medical conditions, please list the age of onset and progression. If there is any known genetic defect in the family (e.g., cystic fibrosis, hemophilia), please list it. Social history is a part of a medical history that addresses social aspects (e.g., occupation, socioeconomic status, drug use) of the patient’s life that might pertain to the patient’s conditions. \n\n Dialogue: \n{dia} \n\n Patient's family history and social history:",
#     "MEDICATIONS": f"Task: Please list all the medications taken by the patient currently or previously except for what is going to be prescribed newly during the current visit. Please add the following information after the related medication name if mentioned: when started and when finished, the medication’s dosage, frequency, effectiveness, side effects, the patient’s compliance to it, and any other information mentioned by the patient. Medications include vitamins and dietary supplements, herbal remedies, and non-specific drug classes (antibiotics, anti-allergic drugs, etc.). \n\n Dialogue: \n{dia} \n\n Medications:",
#     "PROCEDURES": f"Task: Please list all the invasive procedures the patient has ever undergone before the current visit with the date, location, and complications of them if mentioned. \n\n Dialogue: \n{dia} \n\n Procedures/surgeries:",
#     "ALLERGY": f"Task: Please list all the allergens the patient reacts to. Allergens include drugs, environmental factors such as sunlight and pollen, food, products such as latex, substances such as alcohol, etc. \n\n Dialogue: \n{dia} \n\n Allergies:",
#     'CC': f"Task:The chief complaint should describe the main reason for the patient’s visit and record it in the patient’s own words and in quotation. If the patient comes for some symptoms, you should also record the duration of each symptom, e.g.,’upset stomach for 3 hours.’ \n\n Dialogue: \n{dia} \n\n Chief complaint:",
#     "PASTSURGICAL": f"Task: Please list all the surgeries and transfusions the patient has ever undergone with the date, location, and complications of them if mentioned. \n\n Dialogue: \n{dia} \n\n Past surgeries:",
#     'LABS': f"Task: Please output the results of the lab tests which have been done before the current visit. \n\n Dialogue: \n{dia} \n\n Lab results:",
#     'IMAGING': f"Task: Please output the results of the imaging tests which have been done before the current visit. \n\n Dialogue: \n{dia} \n\n Imaging:"
#     }
#   return prompts[section]

In [None]:
section_pred_lst = []
gold_lst = []
for section, max_token in zip(sections, max_tokens):
  valA_al = valA[valA['section_header'] == section]
  dialogue_al = valA_al['dialogue'].tolist()
  gold_lst.append(valA_al['section_text'].tolist())
  model_pred_lst = []
  for model in model_lst:
    if model in ["gpt-3.5-turbo", "gpt-4"]:
      fun = get_al_chat
    else:
      fun = get_al
    al_pred_lst = []
    for dia in dialogue_al:
      prompt = get_prompt(dia, section)
      #uncomment for generic prompt usage
      # instruction = summarize_raw_instruction
      # instruction = instruction.replace('[target_trainable_instruction]', prompt) #p0
      # instruction = instruction.replace('[section]', "GENERAL HISTORY") #x
      # instruction = instruction.replace('[Conv_snippet]', dia) #x
      # res = fun(model, instruction, max_token)
      #comment line below if lines above are uncommented
      res = fun(model, prompt, max_token)
      al_pred_lst.append(res)
    if section == 'FAM/SOCHX':
      section = "FAM_SOCHX"
    #uncomment for generic prompt usage
    #np.save(f'{model}_{section}_preds_generic.npy', al_pred_lst)
    #comment line below if line above is uncommented
    np.save(f'{model}_{section}_preds.npy', al_pred_lst)
    if section == 'FAM_SOCHX':
      section = "FAM/SOCHX"
    model_pred_lst.append(al_pred_lst)
  section_pred_lst.append(model_pred_lst)

In [None]:
rouge = evaluate.load('rouge')
section_rouge_scores = []
for section, gold_al in zip(section_pred_lst, gold_lst):
  model_rouge_scores = []
  for preds in section:
    rouge_res = rouge.compute(predictions=preds, references=gold_al)
    model_rouge_scores.append(rouge_res)
  section_rouge_scores.append(model_rouge_scores)

In [None]:
cols = {
    'section': [],
    'variant': [],
    'rouge1': [],
    'rouge2': [],
    'rougeL': [],
    'rougeLsum': []
}

# Create an empty DataFrame with these columns
generic_prompt_rouge = pd.DataFrame(cols)

In [None]:
for section, section_rouge in zip(sections, section_rouge_scores):
  print(f'{section}:\n')
  for i, rouge in enumerate(section_rouge):
    # print(f"{model_lst[i]}: ", rouge, "\n")
    meta = {'section': section, 'variant': model_lst[i]}
    all_keys = {**meta, **rouge}
    # Append the dictionary as a new row to the DataFrame
    generic_prompt_rouge = generic_prompt_rouge.append(all_keys, ignore_index=True)

In [None]:
generic_prompt_rouge.to_csv(f'generic_prompt_rouge_{section}.csv', index=False)

In [None]:
generic_prompt_rouge