In [2]:
import pandas as pd
import random
import sys
import numpy as np

sys.path.append("../scripts")
from scripts.translate_summary_json import *

%load_ext autoreload
%autoreload 2

In [3]:
df = pd.read_csv("../datasets/augmented_clinical_notes/cleaned_df.csv")
df.head(15)

Unnamed: 0,idx,note,full_note,conversation,summary_eval
0,155216,"A a sixteen year-old girl, presented to our Ou...","A a sixteen year-old girl, presented to our Ou...","Doctor: Good morning, what brings you to the O...",{'visit motivation': 'Discomfort in the neck a...
1,133948,A 36-year old female patient visited our hospi...,A 36-year old female patient visited our hospi...,"Doctor: Hello, what brings you to the hospital...",{'visit motivation': 'Pain and restricted rang...
2,80176,A 49-year-old male presented with a complaint ...,A 49-year-old male presented with a complaint ...,"Doctor: Good morning, Mr. [Patient's Name]. I'...",{'visit motivation': 'Pain in the left proxima...
3,72232,A 47-year-old male patient was referred to the...,A 47-year-old male patient was referred to the...,"Doctor: Good morning, how are you feeling toda...",{'visit motivation': 'Recurrent attacks of pai...
4,31864,A 24-year-old Yemeni female presented to the e...,A 24-year-old Yemeni female presented to the e...,"Doctor: Good morning, how are you feeling toda...",{'visit motivation': 'Inability to walk and a ...
5,26809,We report a 24-day-old female baby who present...,We report a 24-day-old female baby who present...,"Doctor: Hi there, I am Dr. Smith. How can I he...",{'visit motivation': 'Presented with dyspnea a...
6,149866,A 16 years old female patient presented to us ...,A 16 years old female patient presented to us ...,"Doctor: Good morning, what brings you here tod...",{'visit motivation': 'Inability to walk on bot...
7,87064,We present a case of a seventy-three-year-old ...,We present a case of a seventy-three-year-old ...,"Doctor: Good morning, sir. How can I help you ...",{'visit motivation': 'Concerned with having a ...
8,123006,A 23-year-old female patient was admitted to a...,A 23-year-old female patient was admitted to a...,"Doctor: Hi, how are you feeling today?\nPatien...",{'visit motivation': 'esthetic problem caused ...
9,119317,A 32-year-old healthy female presented to a cl...,A 32-year-old healthy female presented to a cl...,"1. Doctor: Good morning, what brings you here ...",{'visit motivation': 'Severe pain in right tem...


In [4]:
df["summary_eval"].sample(20).to_dict()

{23226: '{\'visit motivation\': \'Newly diagnosed right breast cancer\', \'admission\': [{\'reason\': \'Right skin sparing mastectomy with sentinel lymph node biopsy, left prophylactic skin sparing mastectomy and immediate tissue expander reconstruction\', \'date\': \'None\', \'duration\': \'None\', \'care center details\': \'Our institution\'}], \'patient information\': {\'age\': \'51\', \'sex\': \'Female\', \'ethnicity\': \'None\', \'weight\': \'None\', \'height\': \'None\', \'family medical history\': \'None\', \'recent travels\': \'None\', \'socio economic context\': \'None\', \'occupation\': \'None\'}, \'patient medical history\': {\'physiological context\': \'25 pack year smoking history, right breast stage IA multifocal invasive ductal carcinoma and multifocal ductal carcinoma in situ (DCIS)\', \'psychological context\': \'None\', \'vaccination history\': \'None\', \'allergies\': \'None\', \'exercise frequency\': \'None\', \'nutrition\': \'None\', \'sexual history\': \'None\', \

In [5]:
q_df = pd.read_csv("../Questionnaire - Questions.csv")
q_df.head(5)

Unnamed: 0,Section Header,Question Theme,Question
0,Admission,Reason for Admission,What is the primary reason for the patient's a...
1,Admission,Date of Admission,What is the date the patient was admitted?
2,Admission,Care Center Details,"Where was the patient admitted (e.g., hospital..."
3,Admission,Length of Stay,How long did the patient stay in the care cent...
4,Patient Information,Age,What is the patient's age?


In [6]:
def get_question_list(q_df):
    questions = {section: [] for section in q_df["Section Header"].unique()}
    for row in q_df.to_dict("records"):
        section = row["Section Header"]
        q_theme = row["Question Theme"]
        question = row["Question"]
        
        question_with_theme = "{q_theme} : {question}".format(q_theme=q_theme, question=question)
        questions[section].append(question_with_theme)
    return questions

def random_questions(question_dict):
    random_questions_dict = {}
    selected_keys = np.random.choice(list(question_dict.keys()), size=2, replace=False)  # Select 2 unique keys (no replacement)
    for key in selected_keys:
        random_questions_dict[key] = np.random.choice(question_dict[key], size = len(question_dict[key]), replace=False)  # Select 2 random elements from chosen keys
    return random_questions_dict

def random_full_note(df):
    return np.random.choice(df["full_note"], size = 1, replace=False)
# Example usage

random_questions(get_question_list(q_df))

{'Surgeries': array(['Reason : What was the reason for the surgery?',
        'Outcome : What was the outcome of the surgery (e.g., successful, complications)?',
        'Type : What type of surgery was performed?',
        'Time : When was the surgery performed?',
        'Details (Optional) : Are there any specific details mentioned about the surgical procedure or complications?'],
       dtype='<U108'),
 'Admission': array(['Care Center Details : Where was the patient admitted (e.g., hospital name, department)?',
        'Date of Admission : What is the date the patient was admitted?',
        "Reason for Admission : What is the primary reason for the patient's admission?",
        'Length of Stay : How long did the patient stay in the care center? (Optional)'],
       dtype='<U87')}

In [7]:
def generate_prompt(df, q_df):
    full_note = random_full_note(df)
    random_questions_dict = random_questions(get_question_list(q_df))

    
    print('[Start Full note:]\n',full_note[0],'\n[End Full note]')
    print('\n[Start Random questions]\n')
    for section, questions in random_questions_dict.items():
        print(f"\n** {section} **")
        for question in questions:
            print("*", question)
    print('[End Random questions]')

In [8]:
generate_prompt(df, q_df)

[Start Full note:]
 The patient was a 33-year-old female with a history of cerebral abscess and recurrent olfactory meningioma; she had undergone a posterior fossa craniectomy and C1–C3 laminectomy for a posterior olfactory meningioma and suboccipital pseudomeningocele at another hospital 8 years prior to the current admission at our hospital. She was ventilator-dependent and had repeated episodes of pneumonia, and our department was consulted for tracheostomy creation for long-term respiratory care. Preoperative physical examination and neck computed tomography (CT) revealed an enlarged thyroid gland overlying the trachea, as well as prominent vessels (Fig. ). In addition, the patient was of short stature (140 cm in height), and had a short neck with limited extent of hyperextension. The surgical risks and benefits of a percutaneous versus open tracheostomy were explained to the patient and her family, as were her unique features making her at high risk for a percutaneous tracheostomy

In [None]:
''' GPT: Patient Information

    Past Medical History: The note mentions that the patient has a past medical history of NHL/SLL (Non-Hodgkin's Lymphoma/Small Lymphocytic Lymphoma) diagnosed almost 10 years ago.
    Age: The patient is 77 years old.
    Sex: The patient is identified as a male.
    Presenting Complaint: The patient presented to the hospital with abdominal swelling, altered mental status, difficulty in urinating associated with hematuria, and diffuse bulky lymphadenopathy in the cervical, axillary, and inguinal areas.

Treatments

    Name: The patient was started on venetoclax (BCL-2 inhibitor) for progressive SLL/NHL.
    Dosage: The patient was started on 20 mg/day for seven days and then increased to 50 mg/day.
    Reaction to Treatment: The note mentions that the patient did not develop tumor lysis syndrome after starting the therapy and is tolerating the treatment well based on imaging and clinical findings.
    Duration: The duration of treatment with venetoclax is specified as starting with 20 mg/day for seven days and then increasing to 50 mg/day.
'''


'''
Patient Information:

    Past Medical History: The patient has a history of Non-Hodgkin's Lymphoma (NHL) and Small Lymphocytic Lymphoma (SLL).
    Recent Travel History: No relevant travel information is mentioned in the note.
    Socioeconomic Context: No details about the patient's socioeconomic background are provided.
    Age: The patient is 77 years old.
    Ethnicity: The patient's ethnicity is not specified in the note.
    Family Medical History: No significant family history of medical conditions is mentioned.
    Sex: The patient is male.
    Presenting Complaint: The patient presents with abdominal swelling, altered mental status, and difficulty urinating associated with hematuria.

Treatments:

    Details (Optional): Idelalisib treatment was discontinued a week prior to the patient's presentation due to pancytopenia and a mixed response on the CT scan imaging.
    Name: Idelalisib is the medication or treatment mentioned in the note.
    Related Condition: The treatment was intended for NHL/SLL.
    Dosage: No information about the dosage of the medication is provided.
    Reaction to Treatment: The patient experienced pancytopenia and a mixed response on the CT scan imaging, which led to the discontinuation of the treatment.
    Time: The treatment was initiated almost one year before the patient's presentation.
    Duration: No information about the duration of the treatment is provided.
    Related Condition: The treatment was intended for NHL/SLL.
    Dosage: No information about the dosage of the medication is provided.
'''