In [1]:
import pandas as pd
import re
import os


target_fname = '../data/scoring/scoring_check/discharge_target.csv'

submission_fname = '../data/scoring/scoring_check/submission.csv'

reference_dir = '../data/scoring/scoring_check'
generated_dir = '../data/scoring/scoring_check'
score_dir = '../data/scoring/scoring_check'

In [2]:
import os
import json
import numpy as np
import pandas as pd

import evaluate

from bleu import Bleu
from rouge import Rouge
from bertscore import BertScore
from align import AlignScorer
from UMLSScorer import UMLSScorer


def calculate_scores(generated, reference, metrics):
    if not metrics:
        raise ValueError("No metrics specified for scoring.")
    print("Beginning scoring...")

    scores = {}
    for metric in metrics:
        scores[metric] = {"discharge_instructions": [], "brief_hospital_course": []}

    # initialize scorers
    if "bleu" in metrics:
        bleuScorer = Bleu()
        print("bleuScorer initialized")
    if "rouge" in metrics:
        rougeScorer = Rouge(["rouge1", "rouge2", "rougeL"])
        print("rougeScorer initialized")
    if "bertscore" in metrics:
        bertScorer = BertScore()
        print("bertScorer initialized")
    if "meteor" in metrics:
        meteorScorer = evaluate.load("meteor")
        print("meteorScorer initialized")
    if "align" in metrics:
        alignScorer = AlignScorer()
        print("alignScorer initialized")
    if "medcon" in metrics:
        medconScorer = UMLSScorer(quickumls_fp="/home/quickumls/")
        print("medconScorer initialized")

    def calculate_scores(rows_ref, rows_gen):
        if "bleu" in metrics:
            temp = bleuScorer(
                refs=rows_ref["discharge_instructions"].tolist(),
                hyps=rows_gen["discharge_instructions"].tolist(),
            )
            scores["bleu"]["discharge_instructions"].extend(temp)
            temp = bleuScorer(
                refs=rows_ref["brief_hospital_course"].tolist(),
                hyps=rows_gen["brief_hospital_course"].tolist(),
            )
            scores["bleu"]["brief_hospital_course"].extend(temp)
        if "rouge" in metrics:
            scores["rouge"]["discharge_instructions"] = []
            temp = rougeScorer(
                refs=rows_ref["discharge_instructions"].tolist(),
                hyps=rows_gen["discharge_instructions"].tolist(),
            )
            scores["rouge"]["discharge_instructions"].extend(
                [
                    temp["rouge1"],
                    temp["rouge2"],
                    temp["rougeL"],
                ]
            )
            scores["rouge"]["brief_hospital_course"] = []
            temp = rougeScorer(
                refs=rows_ref["brief_hospital_course"].tolist(),
                hyps=rows_gen["brief_hospital_course"].tolist(),
            )
            scores["rouge"]["brief_hospital_course"].extend(
                [
                    temp["rouge1"],
                    temp["rouge2"],
                    temp["rougeL"],
                ]
            )
        if "bertscore" in metrics:
            temp = bertScorer(
                refs=rows_ref["discharge_instructions"].tolist(),
                hyps=rows_gen["discharge_instructions"].tolist(),
            )
            scores["bertscore"]["discharge_instructions"].extend(temp)
            temp = bertScorer(
                refs=rows_ref["brief_hospital_course"].tolist(),
                hyps=rows_gen["brief_hospital_course"].tolist(),
            )
            scores["bertscore"]["brief_hospital_course"].extend(temp)
        if "meteor" in metrics:
            temp = meteorScorer.compute(
                references=rows_ref["discharge_instructions"].tolist(),
                predictions=rows_gen["discharge_instructions"].tolist(),
            )
            scores["meteor"]["discharge_instructions"].append(temp["meteor"])
            temp = meteorScorer.compute(
                references=rows_ref["brief_hospital_course"].tolist(),
                predictions=rows_gen["brief_hospital_course"].tolist(),
            )
            scores["meteor"]["brief_hospital_course"].append(temp["meteor"])
        if "align" in metrics:
            temp = alignScorer(
                refs=rows_ref["discharge_instructions"].tolist(),
                hyps=rows_gen["discharge_instructions"].tolist(),
            )
            scores["align"]["discharge_instructions"].extend(temp)
            temp = alignScorer(
                refs=rows_ref["brief_hospital_course"].tolist(),
                hyps=rows_gen["brief_hospital_course"].tolist(),
            )
            scores["align"]["brief_hospital_course"].extend(temp)
        if "medcon" in metrics:
            temp = medconScorer(
                rows_ref["discharge_instructions"].tolist(),
                rows_gen["discharge_instructions"].tolist(),
            )
            scores["medcon"]["discharge_instructions"].extend(temp)
            temp = medconScorer(
                rows_ref["brief_hospital_course"].tolist(),
                rows_gen["brief_hospital_course"].tolist(),
            )
            scores["medcon"]["brief_hospital_course"].extend(temp)

        # print progress
        print(f"Processed {i + len(rows_ref)}/{len(generated)} samples.", flush=True)

    reference.set_index("hadm_id", drop=False, inplace=True)
    generated.set_index("hadm_id", drop=False, inplace=True)

    batch_size = 8
    for i in range(0, len(generated), batch_size):
        if i + batch_size > len(generated):
            batch_size = len(generated) - i
        rows_ref = reference[i : i + batch_size]
        rows_gen = generated[i : i + batch_size]
        calculate_scores(rows_ref=rows_ref, rows_gen=rows_gen)

    print(f"Processed {len(generated)} samples.", flush=True)
    print("Done.")
    return scores


def compute_overall_score(scores):
    print("Computing overall score...")
    leaderboard = {}

    metrics = list(scores.keys())

    if "bleu" in metrics:
        bleu_discharge_instructions = np.mean(scores["bleu"]["discharge_instructions"])
        bleu_brief_hospital_course = np.mean(scores["bleu"]["brief_hospital_course"])
        leaderboard["bleu"] = np.mean(
            [bleu_discharge_instructions, bleu_brief_hospital_course]
        )
    if "rouge" in metrics:
        rouge_1_discharge_instructions = np.mean(
            scores["rouge"]["discharge_instructions"][0]
        )
        rouge_2_discharge_instructions = np.mean(
            scores["rouge"]["discharge_instructions"][1]
        )
        rouge_l_discharge_instructions = np.mean(
            scores["rouge"]["discharge_instructions"][2]
        )
        rouge_1_brief_hospital_course = np.mean(
            scores["rouge"]["brief_hospital_course"][0]
        )
        rouge_2_brief_hospital_course = np.mean(
            scores["rouge"]["brief_hospital_course"][1]
        )
        rouge_l_brief_hospital_course = np.mean(
            scores["rouge"]["brief_hospital_course"][2]
        )

        leaderboard["rouge1"] = np.mean(
            [rouge_1_discharge_instructions, rouge_1_brief_hospital_course]
        )
        leaderboard["rouge2"] = np.mean(
            [rouge_2_discharge_instructions, rouge_2_brief_hospital_course]
        )
        leaderboard["rougel"] = np.mean(
            [rouge_l_discharge_instructions, rouge_l_brief_hospital_course]
        )
    if "bertscore" in metrics:
        bertscore_discharge_instructions = np.mean(
            scores["bertscore"]["discharge_instructions"]
        )
        bertscore_brief_hospital_course = np.mean(
            scores["bertscore"]["brief_hospital_course"]
        )
        leaderboard["bertscore"] = np.mean(
            [bertscore_discharge_instructions, bertscore_brief_hospital_course]
        )
    if "meteor" in metrics:
        meteor_discharge_instructions = np.mean(
            scores["meteor"]["discharge_instructions"]
        )
        meteor_brief_hospital_course = np.mean(
            scores["meteor"]["brief_hospital_course"]
        )
        leaderboard["meteor"] = np.mean(
            [meteor_discharge_instructions, meteor_brief_hospital_course]
        )
    if "align" in metrics:
        align_discharge_instructions = np.mean(
            scores["align"]["discharge_instructions"]
        )
        align_brief_hospital_course = np.mean(scores["align"]["brief_hospital_course"])
        leaderboard["align"] = np.mean(
            [align_discharge_instructions, align_brief_hospital_course]
        )
    if "medcon" in metrics:
        medcon_discharge_instructions = np.mean(
            scores["medcon"]["discharge_instructions"]
        )
        medcon_brief_hospital_course = np.mean(
            scores["medcon"]["brief_hospital_course"]
        )
        leaderboard["medcon"] = np.mean(
            [medcon_discharge_instructions, medcon_brief_hospital_course]
        )

    overall_score = np.mean(list(leaderboard.values()))
    leaderboard["overall"] = overall_score

    print("Done.")
    return leaderboard

#################################################################################################
# specify the directories for your submission.csv and the discharge_target.csv
reference_dir = reference_dir
generated_dir = generated_dir

# specify the directory where the scores.json file will be saved
score_dir = score_dir
#################################################################################################

print("Reading generated texts...")
generated = pd.read_csv(
    os.path.join(generated_dir, "submission.csv"), keep_default_na=False
)
reference = pd.read_csv(
    os.path.join(reference_dir, "discharge_target.csv"), keep_default_na=False
)

# covert all elements to string
generated["discharge_instructions"] = generated["discharge_instructions"].astype(str)
reference["discharge_instructions"] = reference["discharge_instructions"].astype(str)

generated["brief_hospital_course"] = generated["brief_hospital_course"].astype(str)
reference["brief_hospital_course"] = reference["brief_hospital_course"].astype(str)

# convert to single-line strings by removing newline characters
generated["discharge_instructions"] = generated["discharge_instructions"].str.replace(
    "\n", " "
)
reference["discharge_instructions"] = reference["discharge_instructions"].str.replace(
    "\n", " "
)

generated["brief_hospital_course"] = generated["brief_hospital_course"].str.replace(
    "\n", " "
)
reference["brief_hospital_course"] = reference["brief_hospital_course"].str.replace(
    "\n", " "
)

# convert all hadm_id to int
generated["hadm_id"] = generated["hadm_id"].astype(int)
reference["hadm_id"] = reference["hadm_id"].astype(int)

# get the list of hadm_ids from the reference
ref_hadm_ids = list(reference["hadm_id"].unique())

# filter the generated texts to only include hadm_ids from the reference
generated = generated[generated["hadm_id"].isin(ref_hadm_ids)]

# check for invalid submissions
if not generated.shape[0] == reference.shape[0]:
    raise ValueError(
        "Submission does not contain the correct number of rows. Please check your submission file."
    )

if set(generated["hadm_id"].unique()) != set(reference["hadm_id"].unique()):
    missing_hadm_ids = set(reference["hadm_id"].unique()) - set(
        generated["hadm_id"].unique()
    )
    extra_hadm_ids = set(generated["hadm_id"].unique()) - set(
        reference["hadm_id"].unique()
    )
    # print(f"Missing hadm_ids: {missing_hadm_ids}")
    # print(f"Extra hadm_ids: {extra_hadm_ids}")
    raise ValueError(
        "Submission does not contain all hadm_ids from the test set. Please check your submission file to ensure all 10,962 samples are present."
    )

if not generated["hadm_id"].nunique() == len(generated):
    raise ValueError(
        "Submission contains duplicate hadm_ids. Please check your submission file."
    )

generated = generated.sort_values(by="hadm_id")
reference = reference.sort_values(by="hadm_id")
print("Done.")

scores = calculate_scores(
    generated, reference, metrics=["bleu", "rouge", "bertscore", "meteor", "align", "medcon"]
)

leaderboard = compute_overall_score(scores)

with open(os.path.join(score_dir, "scores.json"), "w") as score_file:
    score_file.write(json.dumps(leaderboard))

ModuleNotFoundError: No module named 'evaluate'

In [None]:
BHC = """ 
Brief Hospital Course:

The patient is a male with a history of stage IV Hodgkin's lymphoma, systemic sarcoidosis, inflammatory bowel disease, obstructive sleep apnea with CPAP, immune thrombocytopenia post-splenectomy, and prior supraventricular tachycardia. He presented with neutropenic fever, supraventricular tachycardia, and hypotension. He was admitted to the intensive care unit for management of these acute conditions.

Neutropenic Fever:
Presentation: Patient reported chills and a temperature of 102.2°F, along with a minor, nonproductive cough.
Diagnostic Workup: Labs showed an absolute neutrophil count (ANC) of 8, suggestive of severe neutropenia.
Treatment: Administered vancomycin and cefepime for broad-spectrum antibiotic coverage. Received stress dose steroids (hydrocortisone 100mg IV).
Pending Workup/Instructions: Continue monitoring WBC and ANC levels. Filgrastim 300 mcg SC was started and is to be continued daily until further evaluation in clinic.
ICD Diagnosis: Neutropenia, unspecified.
Supraventricular Tachycardia (SVT):
Presentation: Tachycardia to the 140s noted, with associated hypotension.
Diagnostic Workup: EKG confirmed AVNRT. Patient self-converted to sinus rhythm during CVL placement.
Treatment: Esmolol drip initiated, later switched to neosynephrine for blood pressure support. No ablation due to immunocompromised state.
Pending Workup/Instructions: Continue cardiovascular monitoring; follow up with cardiology for ongoing management.
ICD Diagnosis: Supraventricular tachycardia.
Hypotension:
Presentation: Documented low systolic blood pressures during episodes of SVT.
Treatment: Managed with volume resuscitation (total of 4L IV fluids in ED) and pressors.
Pending Workup/Instructions: Monitor blood pressure and adjust pressors as needed. Assess need for fluid balance and electrolyte repletion.
ICD Diagnosis: Hypotension, unspecified.""".replace('\n', '')

discharge_instructions =  """ Dear [Patient's Name],

It was a pleasure to have you in our care at the hospital. You were admitted due to a high fever, rapid heart rate, and low blood pressure, symptoms that were complicated by your ongoing treatment for Hodgkin's lymphoma and other health conditions. During your stay, we conducted several important tests, including blood tests that showed a very low white blood cell count, indicating neutropenia, which can make you more susceptible to infections. We also identified episodes of rapid heartbeats through an EKG.

Your treatment included the administration of antibiotics, vancomycin and cefepime, to manage potential infections, and medications such as hydrocortisone and esmolol to stabilize your heart rate and blood pressure. We also adjusted your fluid intake and administered pressors to help manage your blood pressure.

Upon discharge, please continue taking your prescribed medications as listed in your discharge summary. Notably, we have started you on Filgrastim to help boost your white blood cells, which you should take daily until your follow-up clinic visit. Please ensure to schedule a follow-up appointment with both the Bone Marrow Transplant team and your cardiologist to monitor your progress and adjust your treatment as necessary. There are no pending tests, but it's important to monitor your temperature and symptoms at home.

Thank you for trusting us with your care. Wishing you a smooth and speedy recovery!

Sincerely,
Your Healthcare Team"""


target_BHC = """  ___ male with h/o Hodgkin's lymphoma C1D17 ABVD, sarcoidosis, 
OSA on CPAP, h/o AVNRT, presented with neutropenic fever and 
SVT.

MICU Course:  
============
Pt. was initially admitted to the MICU for hemodynamic 
instability requiring pressors (levo) in the setting of presumed 
neutropenic fever and sepsis.  Pt.'s hemodynamics quickly 
improved once starting cefepime/vancomycin with fluid 
resuscitation.  He was also noted to have a downtrending 
lactate.  He was quickly weaned off of pressors and transferred 
to the floor."""


target_discharge_instructions = """Dear Mr. ___,

It has been our pleasure to be taking care of you.  You were 
admitted for neutropenic fever.  Your heart rate was initially 
high and your blood pressure was initially low.  We treated you 
with antibiotics, and your fever has since resolved.  You were 
also seen by our cardiologists, who deemed that your initial 
fast heart rate was related to the infection, and that there 
will be no changes needed for your heart medications.  You 
tolerated chemotherapy this week well.  Please follow up with 
___ clinic upcoming Thurday to further 
evaluate your occassional dizziness while standing up.  You have 
___ clinic scheduled on the same time for your 
follow-up.  Please also remember to take neupogen daily until 
next ___, and you will receive further 
instruction from the ___ clinic on ___. """
