In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import pickle
from pathlib import Path

import numpy as np
from bert_score import BERTScorer
from rouge_score.rouge_scorer import RougeScorer
from scispacy.optimize import linear_sum_assignment

from discharge_summaries.schemas.mimic import BHC

In [None]:
MIMIC_III_DIR = (
    Path.cwd().parent / "data" / "physionet.org" / "files" / "mimiciii" / "1.4"
)
BHC_FPATH = MIMIC_III_DIR / "BHCS.json"

SNOMED_DIR = Path.cwd().parent / "data" / "snomed"
TUNED_PHRASE_MATCHER_FPATH = SNOMED_DIR / "tuned_snomed_phrase_matcher.pkl"
EXAMPLE_DIR = Path.cwd() / "example"

In [None]:
snomed_phrase_matcher = pickle.load(TUNED_PHRASE_MATCHER_FPATH.open("rb"))

In [None]:
bhcs = [BHC(**bhc_dict) for bhc_dict in json.loads(BHC_FPATH.read_text())]
gt_bhc = bhcs[10]

In [None]:
pred_bhc = BHC(**json.loads((EXAMPLE_DIR / "gpt_bhc.json").read_text()))

In [None]:
bert_scorer = BERTScorer(lang="en", rescale_with_baseline=True)
rouge_scorer = RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)

In [None]:
scores = rouge_scorer.score(gt_bhc.full_text, pred_bhc.full_text)
scores

In [None]:
bert_scorer.score([gt_bhc.full_text], [pred_bhc.full_text])

In [None]:
bert_scorer.score([gt_bhc.assessment_and_plan], [pred_bhc.assessment_and_plan])

In [None]:
rouge_scorer.score(gt_bhc.assessment_and_plan, pred_bhc.assessment_and_plan)

In [None]:
pred_bhc.assessment_and_plan

In [None]:
gt_headings = [section.heading for section in gt_bhc.problem_sections]
gt_headings_snomed_cuis = [
    {ent.label_ for ent in snomed_phrase_matcher(heading).ents}
    for heading in gt_headings
]

In [None]:
pred_headings = [section.heading for section in pred_bhc.problem_sections]
pred_headings_snomed_cuis = [
    {ent.label_ for ent in snomed_phrase_matcher(heading).ents}
    for heading in pred_headings
]

In [None]:
def jaccard_metric(gt, pred):
    return len(gt.intersection(pred)) / len(gt.union(pred))


cui_match_scores = np.array(
    [
        [jaccard_metric(pred_cuis, gt_cuis) for pred_cuis in pred_headings_snomed_cuis]
        for gt_cuis in gt_headings_snomed_cuis
    ]
)
gt_idxs, pred_idxs = linear_sum_assignment(cui_match_scores, maximize=True)
cui_match_scores[gt_idxs, pred_idxs].mean(), cui_match_scores

In [None]:
# for gt_idx, pred_idx in zip(gt_idxs, pred_idxs):
#     if cui_match_scores[gt_idx, pred_idx] != 0:
#         print(gt_headings[gt_idx], pred_headings[pred_idx])
#         print(
#             bert_scorer.score(
#                 [gt_bhc.problem_sections[gt_idx].text],
#                 [pred_bhc.problem_sections[pred_idx].text],
#             )
#         )
#         print(
#             rouge_scorer.score(
#                 gt_bhc.problem_sections[gt_idx].text,
#                 pred_bhc.problem_sections[pred_idx].text,
#             )
#         )

In [None]:
# pred_headings[0]

In [None]:
# for pred_idx, gt_idx in zip(pred_idxs, gt_idxs):
#     print(pred_headings[gt_idx], gt_headings[pred_idx])