In [1]:
import os
import json
from datetime import datetime
from transformers import pipeline

google/flan-t5-base

In [2]:
import os
import json
from datetime import datetime
from transformers import pipeline

# Helper function to convert a date string into a formatted date with ordinal suffix.
def format_date(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    except Exception:
        return date_str
    day = date_obj.day
    if 4 <= day <= 20 or 24 <= day <= 30:
        suffix = "th"
    else:
        suffix = ["st", "nd", "rd"][day % 10 - 1]
    return f"{day}{suffix} of {date_obj.strftime('%B')}, {date_obj.year}"

MODEL_NAME = "google/flan-t5-base"
try:
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")
except Exception:
    print("Downloading model...")
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")

directory_path = "/content/"

for filename in os.listdir(directory_path):
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            patient_data = json.load(file)

        # extract information

        # patient demographics
        patient_id = patient_data.get("patient_id", "Unknown")
        demo = patient_data.get("patient_demographics", {})
        name = demo.get("name", "Unknown")
        age = demo.get("age", "Unknown")
        gender = demo.get("gender", "Unknown")
        admission_date_raw = demo.get("admission_date", "Unknown")
        # Use expected_discharge_date if available; otherwise default to discharge_date.
        discharge_date_raw = demo.get("expected_discharge_date", demo.get("discharge_date", "Unknown"))
        admission_date = format_date(admission_date_raw) if admission_date_raw != "Unknown" else admission_date_raw
        discharge_date = format_date(discharge_date_raw) if discharge_date_raw != "Unknown" else discharge_date_raw
        last_name = name.split()[-1] if name != "Unknown" else name

        # title based on gender.
        if gender.lower() == "male":
            title_name = f"Mr. {last_name}"
        elif gender.lower() == "female":
            title_name = f"Ms. {last_name}"
        else:
            title_name = name

        # encounter details.
        encounters = patient_data.get("encounters", [])
        admission_encounter = next((e for e in encounters if e.get("type") == "Admission"), {})
        admission_reason = admission_encounter.get("reason", "no specific admission reason provided")
        ecg_encounter = next((e for e in encounters if e.get("type") == "ECG"), {})
        ecg_findings = ecg_encounter.get("findings", "ECG findings not provided")
        pci_encounter = next((e for e in encounters if e.get("type") == "PCI"), {})
        pci_description = pci_encounter.get("description", "PCI details not provided")
        ct_encounter = next((e for e in encounters if e.get("type") == "CT Scan"), {})
        ct_findings = ct_encounter.get("findings", "CT scan findings not provided")
        ct_date = ct_encounter.get("date", "Unknown date")

        # diagnosis details.
        diagnoses = patient_data.get("diagnoses", [])
        if diagnoses:
            diagnosis_info = diagnoses[0]
            diagnosis_description = diagnosis_info.get("description", "Diagnosis description not provided")
            diagnosis_code = diagnosis_info.get("diagnosis_code", "Diagnosis code not provided")
        else:
            diagnosis_description = "Diagnosis description not provided"
            diagnosis_code = "Diagnosis code not provided"

        # lab test details.
        # Skip any tests with result "result not provided"
        labs = patient_data.get("labs", [])
        new_lab_details_list = []
        for lab in labs:
            lab_date = lab.get("date", "Unknown date")
            for test in lab.get("tests", []):
                test_name = test.get("name", "Unknown test")
                result = test.get("result", "result not provided")
                if result.lower() == "result not provided":
                    continue  # Skip this test entry.
                new_lab_details_list.append(f"{test_name} on {lab_date}: {result}")
        lab_details_str = ", ".join(new_lab_details_list) if new_lab_details_list else "No lab tests available."

        # follow-up care information.
        follow_up_list = patient_data.get("follow_up_care", [])
        if follow_up_list:
            follow_up = follow_up_list[0]
            follow_up_text = (f"A follow-up appointment at the {follow_up.get('type', 'clinic')} has been scheduled: "
                              f"{follow_up.get('details', '')}")
        else:
            follow_up_text = "No follow-up details available."

        # lifestyle modifications.
        lifestyle_mods = patient_data.get("lifestyle_modifications", [])
        lifestyle_text = " ".join([f"{item.get('recommendation')}: {item.get('details')}"
                                   for item in lifestyle_mods]) if lifestyle_mods else "No lifestyle modifications noted."

        # Analyze ward round notes for discharge safety.
        ward_notes = patient_data.get("ward_round_notes", [])
        discharge_notes = []
        for note in ward_notes:
            if "not safe for discharge" in note.get("note", "").lower():
                discharge_notes.append(f"On {note.get('date', 'Unknown')} {note.get('time', '')} note: {note.get('note', '')}")
        if discharge_notes:
            discharge_status = (f"Based on the ward round notes, {title_name} has been deemed not safe for discharge at this time. "
                                "Details: " + " ".join(discharge_notes))
        else:
            discharge_status = f"{title_name} is considered stable for discharge."

        # Process medication orders grouped by date.
        med_orders = patient_data.get("med_orders", [])
        meds_day1 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                     for m in med_orders if m.get("date") == admission_date_raw]
        other_dates = sorted(set(m.get("date") for m in med_orders if m.get("date") != admission_date_raw))
        meds_day2 = []
        if other_dates:
            next_day = other_dates[0]
            meds_day2 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                         for m in med_orders if m.get("date") == next_day]
        meds_day1_str = ", ".join(meds_day1) if meds_day1 else "No medications recorded on admission"
        meds_day2_str = ", ".join(meds_day2) if meds_day2 else "No additional medications recorded"

        # expected discharge summary.
        expected_output = (
            f"Name: {name}\n" +
            "Patient Information:\n"
            f"Patient ID: {patient_id}\n"
            f"Age: {age}\n"
            f"Gender: {gender}\n"
            f"Admission Date: {admission_date_raw}\n"
            f"Expected Discharge Date: {discharge_date_raw}\n"
            "\n"
            "Diagnosis:\n"
            f"{title_name} was admitted on {admission_date} after presenting with {admission_reason}. "
            f"Diagnostic evaluations, including a CT scan, confirmed the presence of {diagnosis_description} ({diagnosis_code}).\n"
            "\n"
            "Hospital Course:\n"
            f"Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on {ct_date} revealed: {ct_findings}. "
            f"Laboratory findings included: {lab_details_str}\n"
            f"Medications administered on the day of admission were: {meds_day1_str}. Subsequent medication orders on the following day included: {meds_day2_str}.\n"
            "\n"
            "Discharge Suitability:\n"
            f"{discharge_status}\n"
            "\n"
            "Follow-up Care:\n"
            f"{follow_up_text}\n"
            "\n"
            "Lifestyle Modifications:\n"
            f"{title_name} has been advised to adopt a heart-healthy diet and modify his lifestyle as follows: {lifestyle_text}\n"
            "\n"
            "In conclusion, based on the available clinical data and ongoing assessments, this summary outlines the course of care for "
            f"{title_name}. Please note the current recommendation regarding discharge is stated above and further monitoring is advised if the patient is not safe for discharge."
        )

        # prompt for the LLM
        prompt = (
            "Print exactly as below: \n" + expected_output + "except omitting those told as 'not provided', do not change anything else including punctuation, spacing, and line breaks."
        )

        output = summarizer(prompt, max_new_tokens=800, do_sample=False, temperature=0)
        generated_text = output[0]['generated_text'].strip()

        print(f"\n--- Discharge Summary for {filename} ---\n")
        print(generated_text)
        print("\n" + "-"*50 + "\n")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (646 > 512). Running this sequence through the model will result in indexing errors



--- Discharge Summary for data_4.json ---

Name: Sean Doe Patient Information: Patient ID: 000 Age: 65 Gender: Male Admission Date: 2024-03-10 Expected Discharge Date: 2024-03-12 Diagnosis: Mr. Doe was admitted on 10th of March, 2024 after presenting with CP, SoB, v5-6 ST elevation. Diagnostic evaluations, including a CT scan, confirmed the presence of ST elevation (STEMI) myocardial infarction involving left main coronary artery (I2101). Hospital Course: Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on Unknown date revealed: CT scan findings not provided. Laboratory findings included: Troponin T on 2024-03-11: 30, Troponin T on 2024-03-11: 29. Medications administered on the day of admission were: No medications recorded on admission. Subsequent medication orders on the following day included: Morphine (2-4 mg, As needed for pain), Oxygen (2-4 L/min, If O2 sat  90%), Nitroglycerin (0.4 mg, Every 5 minutes for 3 doses if pain p

t5-small

In [3]:
# Helper function to convert a date string into a formatted date with ordinal suffix.
def format_date(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    except Exception:
        return date_str
    day = date_obj.day
    if 4 <= day <= 20 or 24 <= day <= 30:
        suffix = "th"
    else:
        suffix = ["st", "nd", "rd"][day % 10 - 1]
    return f"{day}{suffix} of {date_obj.strftime('%B')}, {date_obj.year}"

MODEL_NAME = "t5-small"
try:
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")
except Exception:
    print("Downloading model...")
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")

directory_path = "/content/"

for filename in os.listdir(directory_path):
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            patient_data = json.load(file)

        # extract information

        # patient demographics
        patient_id = patient_data.get("patient_id", "Unknown")
        demo = patient_data.get("patient_demographics", {})
        name = demo.get("name", "Unknown")
        age = demo.get("age", "Unknown")
        gender = demo.get("gender", "Unknown")
        admission_date_raw = demo.get("admission_date", "Unknown")
        # Use expected_discharge_date if available; otherwise default to discharge_date.
        discharge_date_raw = demo.get("expected_discharge_date", demo.get("discharge_date", "Unknown"))
        admission_date = format_date(admission_date_raw) if admission_date_raw != "Unknown" else admission_date_raw
        discharge_date = format_date(discharge_date_raw) if discharge_date_raw != "Unknown" else discharge_date_raw
        last_name = name.split()[-1] if name != "Unknown" else name

        # title based on gender.
        if gender.lower() == "male":
            title_name = f"Mr. {last_name}"
        elif gender.lower() == "female":
            title_name = f"Ms. {last_name}"
        else:
            title_name = name

        # encounter details.
        encounters = patient_data.get("encounters", [])
        admission_encounter = next((e for e in encounters if e.get("type") == "Admission"), {})
        admission_reason = admission_encounter.get("reason", "no specific admission reason provided")
        ecg_encounter = next((e for e in encounters if e.get("type") == "ECG"), {})
        ecg_findings = ecg_encounter.get("findings", "ECG findings not provided")
        pci_encounter = next((e for e in encounters if e.get("type") == "PCI"), {})
        pci_description = pci_encounter.get("description", "PCI details not provided")
        ct_encounter = next((e for e in encounters if e.get("type") == "CT Scan"), {})
        ct_findings = ct_encounter.get("findings", "CT scan findings not provided")
        ct_date = ct_encounter.get("date", "Unknown date")

        # diagnosis details.
        diagnoses = patient_data.get("diagnoses", [])
        if diagnoses:
            diagnosis_info = diagnoses[0]
            diagnosis_description = diagnosis_info.get("description", "Diagnosis description not provided")
            diagnosis_code = diagnosis_info.get("diagnosis_code", "Diagnosis code not provided")
        else:
            diagnosis_description = "Diagnosis description not provided"
            diagnosis_code = "Diagnosis code not provided"

        # lab test details.
        # Skip any tests with result "result not provided"
        labs = patient_data.get("labs", [])
        new_lab_details_list = []
        for lab in labs:
            lab_date = lab.get("date", "Unknown date")
            for test in lab.get("tests", []):
                test_name = test.get("name", "Unknown test")
                result = test.get("result", "result not provided")
                if result.lower() == "result not provided":
                    continue  # Skip this test entry.
                new_lab_details_list.append(f"{test_name} on {lab_date}: {result}")
        lab_details_str = ", ".join(new_lab_details_list) if new_lab_details_list else "No lab tests available."

        # follow-up care information.
        follow_up_list = patient_data.get("follow_up_care", [])
        if follow_up_list:
            follow_up = follow_up_list[0]
            follow_up_text = (f"A follow-up appointment at the {follow_up.get('type', 'clinic')} has been scheduled: "
                              f"{follow_up.get('details', '')}")
        else:
            follow_up_text = "No follow-up details available."

        # lifestyle modifications.
        lifestyle_mods = patient_data.get("lifestyle_modifications", [])
        lifestyle_text = " ".join([f"{item.get('recommendation')}: {item.get('details')}"
                                   for item in lifestyle_mods]) if lifestyle_mods else "No lifestyle modifications noted."

        # Analyze ward round notes for discharge safety.
        ward_notes = patient_data.get("ward_round_notes", [])
        discharge_notes = []
        for note in ward_notes:
            if "not safe for discharge" in note.get("note", "").lower():
                discharge_notes.append(f"On {note.get('date', 'Unknown')} {note.get('time', '')} note: {note.get('note', '')}")
        if discharge_notes:
            discharge_status = (f"Based on the ward round notes, {title_name} has been deemed not safe for discharge at this time. "
                                "Details: " + " ".join(discharge_notes))
        else:
            discharge_status = f"{title_name} is considered stable for discharge."

        # Process medication orders grouped by date.
        med_orders = patient_data.get("med_orders", [])
        meds_day1 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                     for m in med_orders if m.get("date") == admission_date_raw]
        other_dates = sorted(set(m.get("date") for m in med_orders if m.get("date") != admission_date_raw))
        meds_day2 = []
        if other_dates:
            next_day = other_dates[0]
            meds_day2 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                         for m in med_orders if m.get("date") == next_day]
        meds_day1_str = ", ".join(meds_day1) if meds_day1 else "No medications recorded on admission"
        meds_day2_str = ", ".join(meds_day2) if meds_day2 else "No additional medications recorded"

        # expected discharge summary.
        expected_output = (
            f"Name: {name}\n" +
            "Patient Information:\n"
            f"Patient ID: {patient_id}\n"
            f"Age: {age}\n"
            f"Gender: {gender}\n"
            f"Admission Date: {admission_date_raw}\n"
            f"Expected Discharge Date: {discharge_date_raw}\n"
            "\n"
            "Diagnosis:\n"
            f"{title_name} was admitted on {admission_date} after presenting with {admission_reason}. "
            f"Diagnostic evaluations, including a CT scan, confirmed the presence of {diagnosis_description} ({diagnosis_code}).\n"
            "\n"
            "Hospital Course:\n"
            f"Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on {ct_date} revealed: {ct_findings}. "
            f"Laboratory findings included: {lab_details_str}\n"
            f"Medications administered on the day of admission were: {meds_day1_str}. Subsequent medication orders on the following day included: {meds_day2_str}.\n"
            "\n"
            "Discharge Suitability:\n"
            f"{discharge_status}\n"
            "\n"
            "Follow-up Care:\n"
            f"{follow_up_text}\n"
            "\n"
            "Lifestyle Modifications:\n"
            f"{title_name} has been advised to adopt a heart-healthy diet and modify his lifestyle as follows: {lifestyle_text}\n"
            "\n"
            "In conclusion, based on the available clinical data and ongoing assessments, this summary outlines the course of care for "
            f"{title_name}. Please note the current recommendation regarding discharge is stated above and further monitoring is advised if the patient is not safe for discharge."
        )

        # prompt for the LLM
        prompt = (
            "Print exactly as" + expected_output + "except omitting those told as 'not provided', do not change anything else including punctuation, spacing, and line breaks."
        )

        output = summarizer(prompt, max_new_tokens=800, do_sample=False, temperature=0)
        generated_text = output[0]['generated_text'].strip()

        print(f"\n--- Discharge Summary for {filename} ---\n")
        print(generated_text)
        print("\n" + "-"*50 + "\n")


Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (644 > 512). Running this sequence through the model will result in indexing errors



--- Discharge Summary for data_4.json ---

::: Troponin T on 2024-03-11: 30, Troponin T on 2024-03-11: 29 Medications administered on the day of admission were: No medications recorded on admission . Diagnostic evaluations, including a CT scan, confirmed the presence of ST elevation (STEMI) myocardial infarction involving left main coronary artery (I2101). Hospital Course: No follow-up details available. Lifestyle Modifications: Mr. Doe is considered stable for discharge. Follow

--------------------------------------------------


--- Discharge Summary for data.json ---

: 50 mg/L, Hemoglobin on 2024-02-11: 13.3 g/dL, Platelets on 2024-02-13: 13 g/dL, Platelets on 2024-02-13: 238 x109/L Medications administered on the day of admission were: Amoxicillin IV (500 mg, 8 hourly), Paracetamol (PRN, None). Discharge Suitability: Mr. Doe is considered stable for discharge. Follow-

--------------------------------------------------


--- Discharge Summary for data_2.json ---

,,,, CBC on 202

facebook/bart-base

In [4]:
# Helper function to convert a date string into a formatted date with ordinal suffix.
def format_date(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    except Exception:
        return date_str
    day = date_obj.day
    if 4 <= day <= 20 or 24 <= day <= 30:
        suffix = "th"
    else:
        suffix = ["st", "nd", "rd"][day % 10 - 1]
    return f"{day}{suffix} of {date_obj.strftime('%B')}, {date_obj.year}"

MODEL_NAME = "facebook/bart-base"
try:
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")
except Exception:
    print("Downloading model...")
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")

directory_path = "/content/"

for filename in os.listdir(directory_path):
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            patient_data = json.load(file)

        # extract information

        # patient demographics
        patient_id = patient_data.get("patient_id", "Unknown")
        demo = patient_data.get("patient_demographics", {})
        name = demo.get("name", "Unknown")
        age = demo.get("age", "Unknown")
        gender = demo.get("gender", "Unknown")
        admission_date_raw = demo.get("admission_date", "Unknown")
        # Use expected_discharge_date if available; otherwise default to discharge_date.
        discharge_date_raw = demo.get("expected_discharge_date", demo.get("discharge_date", "Unknown"))
        admission_date = format_date(admission_date_raw) if admission_date_raw != "Unknown" else admission_date_raw
        discharge_date = format_date(discharge_date_raw) if discharge_date_raw != "Unknown" else discharge_date_raw
        last_name = name.split()[-1] if name != "Unknown" else name

        # title based on gender.
        if gender.lower() == "male":
            title_name = f"Mr. {last_name}"
        elif gender.lower() == "female":
            title_name = f"Ms. {last_name}"
        else:
            title_name = name

        # encounter details.
        encounters = patient_data.get("encounters", [])
        admission_encounter = next((e for e in encounters if e.get("type") == "Admission"), {})
        admission_reason = admission_encounter.get("reason", "no specific admission reason provided")
        ecg_encounter = next((e for e in encounters if e.get("type") == "ECG"), {})
        ecg_findings = ecg_encounter.get("findings", "ECG findings not provided")
        pci_encounter = next((e for e in encounters if e.get("type") == "PCI"), {})
        pci_description = pci_encounter.get("description", "PCI details not provided")
        ct_encounter = next((e for e in encounters if e.get("type") == "CT Scan"), {})
        ct_findings = ct_encounter.get("findings", "CT scan findings not provided")
        ct_date = ct_encounter.get("date", "Unknown date")

        # diagnosis details.
        diagnoses = patient_data.get("diagnoses", [])
        if diagnoses:
            diagnosis_info = diagnoses[0]
            diagnosis_description = diagnosis_info.get("description", "Diagnosis description not provided")
            diagnosis_code = diagnosis_info.get("diagnosis_code", "Diagnosis code not provided")
        else:
            diagnosis_description = "Diagnosis description not provided"
            diagnosis_code = "Diagnosis code not provided"

        # lab test details.
        # Skip any tests with result "result not provided"
        labs = patient_data.get("labs", [])
        new_lab_details_list = []
        for lab in labs:
            lab_date = lab.get("date", "Unknown date")
            for test in lab.get("tests", []):
                test_name = test.get("name", "Unknown test")
                result = test.get("result", "result not provided")
                if result.lower() == "result not provided":
                    continue  # Skip this test entry.
                new_lab_details_list.append(f"{test_name} on {lab_date}: {result}")
        lab_details_str = ", ".join(new_lab_details_list) if new_lab_details_list else "No lab tests available."

        # follow-up care information.
        follow_up_list = patient_data.get("follow_up_care", [])
        if follow_up_list:
            follow_up = follow_up_list[0]
            follow_up_text = (f"A follow-up appointment at the {follow_up.get('type', 'clinic')} has been scheduled: "
                              f"{follow_up.get('details', '')}")
        else:
            follow_up_text = "No follow-up details available."

        # lifestyle modifications.
        lifestyle_mods = patient_data.get("lifestyle_modifications", [])
        lifestyle_text = " ".join([f"{item.get('recommendation')}: {item.get('details')}"
                                   for item in lifestyle_mods]) if lifestyle_mods else "No lifestyle modifications noted."

        # Analyze ward round notes for discharge safety.
        ward_notes = patient_data.get("ward_round_notes", [])
        discharge_notes = []
        for note in ward_notes:
            if "not safe for discharge" in note.get("note", "").lower():
                discharge_notes.append(f"On {note.get('date', 'Unknown')} {note.get('time', '')} note: {note.get('note', '')}")
        if discharge_notes:
            discharge_status = (f"Based on the ward round notes, {title_name} has been deemed not safe for discharge at this time. "
                                "Details: " + " ".join(discharge_notes))
        else:
            discharge_status = f"{title_name} is considered stable for discharge."

        # Process medication orders grouped by date.
        med_orders = patient_data.get("med_orders", [])
        meds_day1 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                     for m in med_orders if m.get("date") == admission_date_raw]
        other_dates = sorted(set(m.get("date") for m in med_orders if m.get("date") != admission_date_raw))
        meds_day2 = []
        if other_dates:
            next_day = other_dates[0]
            meds_day2 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                         for m in med_orders if m.get("date") == next_day]
        meds_day1_str = ", ".join(meds_day1) if meds_day1 else "No medications recorded on admission"
        meds_day2_str = ", ".join(meds_day2) if meds_day2 else "No additional medications recorded"

        # expected discharge summary.
        expected_output = (
            f"Name: {name}\n" +
            "Patient Information:\n"
            f"Patient ID: {patient_id}\n"
            f"Age: {age}\n"
            f"Gender: {gender}\n"
            f"Admission Date: {admission_date_raw}\n"
            f"Expected Discharge Date: {discharge_date_raw}\n"
            "\n"
            "Diagnosis:\n"
            f"{title_name} was admitted on {admission_date} after presenting with {admission_reason}. "
            f"Diagnostic evaluations, including a CT scan, confirmed the presence of {diagnosis_description} ({diagnosis_code}).\n"
            "\n"
            "Hospital Course:\n"
            f"Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on {ct_date} revealed: {ct_findings}. "
            f"Laboratory findings included: {lab_details_str}\n"
            f"Medications administered on the day of admission were: {meds_day1_str}. Subsequent medication orders on the following day included: {meds_day2_str}.\n"
            "\n"
            "Discharge Suitability:\n"
            f"{discharge_status}\n"
            "\n"
            "Follow-up Care:\n"
            f"{follow_up_text}\n"
            "\n"
            "Lifestyle Modifications:\n"
            f"{title_name} has been advised to adopt a heart-healthy diet and modify his lifestyle as follows: {lifestyle_text}\n"
            "\n"
            "In conclusion, based on the available clinical data and ongoing assessments, this summary outlines the course of care for "
            f"{title_name}. Please note the current recommendation regarding discharge is stated above and further monitoring is advised if the patient is not safe for discharge."
        )

        # prompt for the LLM
        prompt = (
            "Print exactly as the below: \n" + expected_output + "except omitting those told as 'not provided', do not change anything else including punctuation, spacing, and line breaks."
        )

        output = summarizer(prompt, max_new_tokens=800, do_sample=False, temperature=0)
        generated_text = output[0]['generated_text'].strip()

        print(f"\n--- Discharge Summary for {filename} ---\n")
        print(generated_text)
        print("\n" + "-"*50 + "\n")


Device set to use cpu



--- Discharge Summary for data_4.json ---

Print exactly as the below: TextColorName: Sean DoePatient Information:TextColorPatient ID: 000TextColorAge: 65TextColorGender: MaleTextColorAdmission Date: 2024-03-10: 30, 2024-04-11: 30.TextColorExpected Discharge Date: 2019-03, 2024.TextColorTextColorDischarge Suitability:TextColorDiagnosis:TextColorMr. Doe was admitted on 10th of March, 2024 after presenting with CP, SoB, v5-6 ST elevation. Diagnostic evaluations, including a CT scan, confirmed the presence of ST elevation (STEMI) myocardial infarction involving left main coronary artery (I2101).TextColorTextColorHospital Course:TextColorUpon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on Unknown date revealed: CT scan findings not provided. Laboratory findings included: Troponin T on 2024-02-11, 30, Troponins T on 1924-03 -11: 29TextColorMedications administered on the day of admission were: No medications recorded on admission. Subs

google/flan-t5-small

In [8]:
# Helper function to convert a date string into a formatted date with ordinal suffix.
def format_date(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    except Exception:
        return date_str
    day = date_obj.day
    if 4 <= day <= 20 or 24 <= day <= 30:
        suffix = "th"
    else:
        suffix = ["st", "nd", "rd"][day % 10 - 1]
    return f"{day}{suffix} of {date_obj.strftime('%B')}, {date_obj.year}"

MODEL_NAME = "google/flan-t5-small"
try:
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")
except Exception:
    print("Downloading model...")
    summarizer = pipeline("text2text-generation", model=MODEL_NAME, device="cpu")

directory_path = "/content/"

for filename in os.listdir(directory_path):
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            patient_data = json.load(file)

        # extract information

        # patient demographics
        patient_id = patient_data.get("patient_id", "Unknown")
        demo = patient_data.get("patient_demographics", {})
        name = demo.get("name", "Unknown")
        age = demo.get("age", "Unknown")
        gender = demo.get("gender", "Unknown")
        admission_date_raw = demo.get("admission_date", "Unknown")
        # Use expected_discharge_date if available; otherwise default to discharge_date.
        discharge_date_raw = demo.get("expected_discharge_date", demo.get("discharge_date", "Unknown"))
        admission_date = format_date(admission_date_raw) if admission_date_raw != "Unknown" else admission_date_raw
        discharge_date = format_date(discharge_date_raw) if discharge_date_raw != "Unknown" else discharge_date_raw
        last_name = name.split()[-1] if name != "Unknown" else name

        # title based on gender.
        if gender.lower() == "male":
            title_name = f"Mr. {last_name}"
        elif gender.lower() == "female":
            title_name = f"Ms. {last_name}"
        else:
            title_name = name

        # encounter details.
        encounters = patient_data.get("encounters", [])
        admission_encounter = next((e for e in encounters if e.get("type") == "Admission"), {})
        admission_reason = admission_encounter.get("reason", "no specific admission reason provided")
        ecg_encounter = next((e for e in encounters if e.get("type") == "ECG"), {})
        ecg_findings = ecg_encounter.get("findings", "ECG findings not provided")
        pci_encounter = next((e for e in encounters if e.get("type") == "PCI"), {})
        pci_description = pci_encounter.get("description", "PCI details not provided")
        ct_encounter = next((e for e in encounters if e.get("type") == "CT Scan"), {})
        ct_findings = ct_encounter.get("findings", "CT scan findings not provided")
        ct_date = ct_encounter.get("date", "Unknown date")

        # diagnosis details.
        diagnoses = patient_data.get("diagnoses", [])
        if diagnoses:
            diagnosis_info = diagnoses[0]
            diagnosis_description = diagnosis_info.get("description", "Diagnosis description not provided")
            diagnosis_code = diagnosis_info.get("diagnosis_code", "Diagnosis code not provided")
        else:
            diagnosis_description = "Diagnosis description not provided"
            diagnosis_code = "Diagnosis code not provided"

        # lab test details.
        # Skip any tests with result "result not provided"
        labs = patient_data.get("labs", [])
        new_lab_details_list = []
        for lab in labs:
            lab_date = lab.get("date", "Unknown date")
            for test in lab.get("tests", []):
                test_name = test.get("name", "Unknown test")
                result = test.get("result", "result not provided")
                if result.lower() == "result not provided":
                    continue  # Skip this test entry.
                new_lab_details_list.append(f"{test_name} on {lab_date}: {result}")
        lab_details_str = ", ".join(new_lab_details_list) if new_lab_details_list else "No lab tests available."

        # follow-up care information.
        follow_up_list = patient_data.get("follow_up_care", [])
        if follow_up_list:
            follow_up = follow_up_list[0]
            follow_up_text = (f"A follow-up appointment at the {follow_up.get('type', 'clinic')} has been scheduled: "
                              f"{follow_up.get('details', '')}")
        else:
            follow_up_text = "No follow-up details available."

        # lifestyle modifications.
        lifestyle_mods = patient_data.get("lifestyle_modifications", [])
        lifestyle_text = " ".join([f"{item.get('recommendation')}: {item.get('details')}"
                                   for item in lifestyle_mods]) if lifestyle_mods else "No lifestyle modifications noted."

        # Analyze ward round notes for discharge safety.
        ward_notes = patient_data.get("ward_round_notes", [])
        discharge_notes = []
        for note in ward_notes:
            if "not safe for discharge" in note.get("note", "").lower():
                discharge_notes.append(f"On {note.get('date', 'Unknown')} {note.get('time', '')} note: {note.get('note', '')}")
        if discharge_notes:
            discharge_status = (f"Based on the ward round notes, {title_name} has been deemed not safe for discharge at this time. "
                                "Details: " + " ".join(discharge_notes))
        else:
            discharge_status = f"{title_name} is considered stable for discharge."

        # Process medication orders grouped by date.
        med_orders = patient_data.get("med_orders", [])
        meds_day1 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                     for m in med_orders if m.get("date") == admission_date_raw]
        other_dates = sorted(set(m.get("date") for m in med_orders if m.get("date") != admission_date_raw))
        meds_day2 = []
        if other_dates:
            next_day = other_dates[0]
            meds_day2 = [f"{m.get('medication')} ({m.get('dose')}, {m.get('frequency')})"
                         for m in med_orders if m.get("date") == next_day]
        meds_day1_str = ", ".join(meds_day1) if meds_day1 else "No medications recorded on admission"
        meds_day2_str = ", ".join(meds_day2) if meds_day2 else "No additional medications recorded"

        # expected discharge summary.
        expected_output = (
            f"Name: {name}\n" +
            "Patient Information:\n"
            f"Patient ID: {patient_id}\n"
            f"Age: {age}\n"
            f"Gender: {gender}\n"
            f"Admission Date: {admission_date_raw}\n"
            f"Expected Discharge Date: {discharge_date_raw}\n"
            "\n"
            "Diagnosis:\n"
            f"{title_name} was admitted on {admission_date} after presenting with {admission_reason}. "
            f"Diagnostic evaluations, including a CT scan, confirmed the presence of {diagnosis_description} ({diagnosis_code}).\n"
            "\n"
            "Hospital Course:\n"
            f"Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan performed on {ct_date} revealed: {ct_findings}. "
            f"Laboratory findings included: {lab_details_str}\n"
            f"Medications administered on the day of admission were: {meds_day1_str}. Subsequent medication orders on the following day included: {meds_day2_str}.\n"
            "\n"
            "Discharge Suitability:\n"
            f"{discharge_status}\n"
            "\n"
            "Follow-up Care:\n"
            f"{follow_up_text}\n"
            "\n"
            "Lifestyle Modifications:\n"
            f"{title_name} has been advised to adopt a heart-healthy diet and modify his lifestyle as follows: {lifestyle_text}\n"
            "\n"
            "In conclusion, based on the available clinical data and ongoing assessments, this summary outlines the course of care for "
            f"{title_name}. Please note the current recommendation regarding discharge is stated above and further monitoring is advised if the patient is not safe for discharge."
        )

        # prompt for the LLM
        prompt = (
            "Print exactly as the below: \n" + expected_output[:500] + "except omitting those told as 'not provided', do not change anything else including punctuation, spacing, and line breaks."
        )

        output = summarizer(prompt, max_new_tokens=800, do_sample=False, temperature=0)
        generated_text = output[0]['generated_text'].strip()

        print(f"\n--- Discharge Summary for {filename} ---\n")
        print(generated_text)
        print("\n" + "-"*50 + "\n")


Device set to use cpu



--- Discharge Summary for data_4.json ---



--------------------------------------------------


--- Discharge Summary for data.json ---

John Doe Patient Information: Patient ID: 123456 Age: 70 Gender: Male Admission Date: 2024-02-10 Expected Discharge Date: 2024-02-14 Diagnosis: Mr. Doe was admitted on 10th of February, 2024 after presenting with Cough, shortness of breath, hemoptysis, fever. Diagnostic evaluations, including a CT scan, confirmed the presence of Lobar pneumonia, unspecified organism (J18.1). Hospital Course: Upon admission, the patient underwent initial stabilization and further evaluation. A CT scan perexcept omitting those told as 'not provided', do not change anything else including punctuation, spacing, and line breaks.

--------------------------------------------------


--- Discharge Summary for data_2.json ---

The name of the patient is Michael Thompson.

--------------------------------------------------


--- Discharge Summary for data_3.json ---



----