In [1]:
from scope_gen.algorithms import create_scope_gen_pipeline
from scope_gen.baselines.clm.uncertainty import create_clm_pipeline, generate_clm
from scope_gen.algorithms.base import compute_alphas
import os
from scope_gen.mimic_cxr.paths import DATA_DIR
import pickle
import numpy as np
from scope_gen.utils import set_seed, load_configs_from_jsonl


In [2]:
set_seed(0)

In [3]:
alpha = 0.35
score = "sum"
stages = ["generation", "diversity", "quality"]
split_ratios = [1/3, 1/3, 1/3]
alphas = compute_alphas(alpha=alpha, K=len(split_ratios), M=5)
# amount of samples to show
N = 1

In [4]:
# load calibration set
data_dir = os.path.join(DATA_DIR, "processed")
data_path = os.path.join(data_dir, "data.pkl")

# duplicate removal does not need to be calibrated
K = len(stages) - ('remove_dupl' in stages)

with open(data_path, 'rb') as file:
    data = pickle.load(file)
data_cal = data[600:1800]

In [28]:
scope_gen_pipeline = create_scope_gen_pipeline(data=data_cal, split_ratios=split_ratios, alphas=alphas,
                                           score=score, data_splitting=True, verbose=True,
                                           stages=stages, count_adm=False, measure_time=False)['pipeline']
# delta_1 and delta_2 are taken from the quantitative experiments
delta_1 = 0.33
delta_2 = 0.025
clm_pipeline = create_clm_pipeline(data=data_cal, split_ratio=0.5, delta_1=delta_1, 
                                   delta_2=delta_2, use_lambda_1=True, use_lambda_2=True, 
                                   alt_lambda_1=None, alt_lambda_2=None, reduced_max=20, 
                                   measure_time=False, score=score, count_adm=False)

In [29]:
# extract processed answers
processed_data = [data[i] for i in range(-N, 0)]
# add index to answers in each line
for processed_line in processed_data:
    processed_line["idxs"] = np.array([i for i in range(20)])

In [30]:
# get prediction set
scope_gen_out = scope_gen_pipeline.generate_new(processed_data)
# clm prediction set
kept_mask = generate_clm(data=processed_data, clm_pipeline=tuple(clm_pipeline["pipeline"][0]), score=score)

In [31]:
obj_seed_42 = load_configs_from_jsonl(os.path.join(DATA_DIR, "examples", "seed_42.jsonl"))

In [37]:
def convert_generated_text(text):
    import re

    # Split the string at "FINDINGS:" and take the part after
    if "FINDINGS:" in text:
        parts = text.split("FINDINGS:")
    elif "IMPRESSION:" in text:
        parts = text.split("IMPRESSION:")
    if len(parts) > 1:
        relevant_text = parts[1].strip()
    else:
        relevant_text = text

    # Step 2: Replace _ with \_
    relevant_text = relevant_text.replace("_", "\\_")  # No effect in this case

    # Step 3: Replace newlines with \\
    relevant_text = relevant_text.replace("\n", "\\\\")
    return relevant_text

In [38]:
# go through questions and print answers
print("scope_gen Answers")
for idx, line in enumerate(obj_seed_42):
    print(f"Prompt: {line['prompt']}")
    print("Generated answers:")
    print(len(scope_gen_out[idx]["idxs"]))
    for count, idx_ in enumerate(scope_gen_out[idx]["idxs"]):
        # seeds start counting at 42
        conv_idx = 42 + idx_
        obj = load_configs_from_jsonl(os.path.join(DATA_DIR, "examples", "seed_" + str(conv_idx) + ".jsonl"))
        print(str(count + 1) + " & " + "\\parbox{10cm}{" + convert_generated_text(obj[idx]["generated"]) + "} & " + \
              ("\\cmark" if bool(processed_data[idx]["labels"][idx_]) else "\\xmark") + " \\\\")
        print("\\midrule")

Scorgen Answers
Prompt:                                  FINAL REPORT
 EXAMINATION:  DX CHEST PORT LINE/TUBE PLCMT 3 EXAMS
 
 CLINICAL HISTORY  ___ year old man with IPH  // dobhoff placement      dobhoff
 placement
 
 COMPARISON:  ___
 
 FINDINGS:
Generated answers:
4
1 & \parbox{10cm}{The Dobhoff tube lies within the stomach.\\ \\ IMPRESSION: \\ \\ Dobhoff tube lies within the stomach.} & \xmark \\
\midrule
2 & \parbox{10cm}{The nasogastric tube appears to course below the diaphragm and out of view.\\ \\ IMPRESSION: \\ \\ The nasogastric tube courses below the diaphragm and out of view.} & \xmark \\
\midrule
3 & \parbox{10cm}{The tip of the feeding tube lies in the distal stomach.  The tip of the\\ endotracheal tube lies approximately 6 cm above the carina.  The heart is\\ borderline enlarged.  There is mild pulmonary edema.  Consolidation in the right mid\\ and lower zones is unchanged.  There is no pneumothorax.\\ \\ IMPRESSION: \\ \\ The feeding tube lies in the distal stomach.} &

In [39]:
# go through questions and print answers
print("CLM Answers")
for idx, line in enumerate(obj_seed_42):
    print(f"Prompt: {line['prompt']}")
    print("Generated answers")
    idxs = np.where(kept_mask[idx])[0]
    print(len(idxs))
    for count, idx_ in enumerate(idxs):
        conv_idx = 42 + idx_
        obj = load_configs_from_jsonl(os.path.join(DATA_DIR, "examples", "seed_" + str(conv_idx) + ".jsonl"))
        print(str(count + 1) + " & " + "\\parbox{10cm}{" + convert_generated_text(obj[idx]["generated"]) + "} & " + \
              ("\\cmark" if bool(processed_data[idx]["labels"][idx_]) else "\\xmark") + " \\\\")
        print("\\midrule")

CLM Answers
Prompt:                                  FINAL REPORT
 EXAMINATION:  DX CHEST PORT LINE/TUBE PLCMT 3 EXAMS
 
 CLINICAL HISTORY  ___ year old man with IPH  // dobhoff placement      dobhoff
 placement
 
 COMPARISON:  ___
 
 FINDINGS:
Generated answers
18
1 & \parbox{10cm}{The Dobhoff tube lies within the stomach.\\ \\ IMPRESSION: \\ \\ Dobhoff tube lies within the stomach.} & \xmark \\
\midrule
2 & \parbox{10cm}{Comparison to \_\_\_ 08:27 in \_\_\_, the feeding tube has been advanced 2\\ cm and tip now projects over the distal stomach.  The NG tube tip is now in the EG\\ junction near the carina.  The patient has also had surgery.  Allowing for\\ slight rotation the cardiac silhouette remains enlarged.  The mediastinum is\\ widened however this may be due to semi-erect positioning.  Interval appearance of\\ patchy opacities right greater than left.\\ \\ IMPRESSION: \\ \\ The ET tube has been advanced.  The tip now projects approximately 2.6 cm above the carina\\ and should b