In [None]:
import pandas as pd
import json
import re
import os
from oaklib.interfaces.text_annotator_interface import TextAnnotationConfiguration
from oaklib.interfaces.text_annotator_interface import TextAnnotatorInterface
from curategpt.store import get_store
from typing import List, Tuple

# Read in OpenAI key file (for curategpt grounding)
key_file_path = os.path.expanduser("~/openai.key")
# Read the key from the file and set the environment variable
with open(key_file_path, "r") as key_file:
    openai_api_key = key_file.read().strip()
os.environ["OPENAI_API_KEY"] = openai_api_key

# Compile a regex pattern to detect lines starting with "Differential Diagnosis:"
dd_re = re.compile(r"^[^A-z]*Differential Diagnosis")

# Set the directory path
response_dir = "../data/results/geminiflash_results.jsonl"
# Initialize lists to hold the data
service_answers = []
metadata = []


def get_prompts(file_path):
    all_prompts = []
    # Open the JSONL file 
    with open(file_path, 'r') as file:
        # Read each line in the file
        for line in file:
            all_prompts.append(json.loads(line))
    return all_prompts

raw_responses = get_prompts(response_dir)
responses = pd.DataFrame({
    "service_answers": raw_responses.map(lambda x: x["response"]),
    "metadata": raw_responses.map(lambda x: x["id"]),
})
print("Right number of responses: " + str(len(raw_responses) == 5213))

AttributeError: 'list' object has no attribute 'map'

In [12]:
# Function to clean and remove "Differential Diagnosis" header if present
def clean_service_answer(answer: str) -> str:
    """Remove the 'Differential Diagnosis' header if present, and clean the first line."""
    lines = answer.split('\n')
    # Filter out any line that starts with "Differential Diagnosis:"
    cleaned_lines = [line for line in lines if not dd_re.match(line)]
    return '\n'.join(cleaned_lines)

# Clean the diagnosis line by removing leading numbers, periods, asterisks, and spaces
def clean_diagnosis_line(line: str) -> str:
    """Remove leading numbers, asterisks, and unnecessary punctuation/spaces from the diagnosis."""
    line = re.sub(r'^\**\d+\.\s*', '', line)  # Remove leading numbers and periods
    line = line.strip('*')  # Remove asterisks around the text
    return line.strip()  # Strip any remaining spaces

# Split a diagnosis into its main name and synonym if present
def split_diagnosis_and_synonym(diagnosis: str) -> Tuple[str, str]:
    """Split the diagnosis into main name and synonym (if present in parentheses)."""
    match = re.match(r'^(.*)\s*\((.*)\)\s*$', diagnosis)
    if match:
        main_diagnosis, synonym = match.groups()
        return main_diagnosis.strip(), synonym.strip()
    return diagnosis, None  # Return the original diagnosis if no synonym is found

def perform_curategpt_grounding(
    diagnosis: str,
    path: str,
    collection: str,
    database_type: str = "chromadb",
    limit: int = 1,
    relevance_factor: float = 0.23,
    verbose: bool = False
) -> List[Tuple[str, str]]:
    """
    Use curategpt to perform grounding for a given diagnosis when initial attempts fail.
    
    Parameters:
    - diagnosis: The diagnosis text to ground.
    - path: The path to the database. You'll need to create an index of Mondo using curategpt in this db
    - collection: The collection to search within curategpt. Name of mondo collection in the db
    NB: You can make this collection by running curategpt thusly:
    `curategpt ontology index --index-fields label,definition,relationships -p stagedb -c ont_mondo -m openai: sqlite:obo:mondo`
    - database_type: The type of database used for grounding (e.g., chromadb, duckdb).
    - limit: The number of search results to return.
    - relevance_factor: The distance threshold for relevance filtering.
    - verbose: Whether to print verbose output for debugging.
    
    Returns:
    - List of tuples: [(Mondo ID, Label), ...]
    """
    # Initialize the database store
    db = get_store(database_type, path)
    
    # Perform the search using the provided diagnosis
    results = db.search(diagnosis, collection=collection)

    # Filter results based on relevance factor (distance)
    if relevance_factor is not None:
        results = [(obj, distance, _meta) for obj, distance, _meta in results if distance <= relevance_factor]

    # Limit the results to the specified number (limit)
    limited_results = results[:limit]

    # Extract Mondo IDs and labels
    pred_ids = []
    pred_labels = []

    for obj, distance, _meta in limited_results:
        disease_mondo_id = obj.get("original_id")  # Use the 'original_id' field for Mondo ID
        disease_label = obj.get("label")
        
        if disease_mondo_id and disease_label:
            pred_ids.append(disease_mondo_id)
            pred_labels.append(disease_label)

    # Return as a list of tuples (Mondo ID, Label)
    if len(pred_ids) == 0:
        if verbose:
            print(f"No grounded IDs found for {diagnosis}")
        return [('N/A', 'No grounding found')]

    return list(zip(pred_ids, pred_labels))


# Perform grounding on the text to MONDO ontology and return the result
def perform_oak_grounding(
    annotator: TextAnnotatorInterface,
    diagnosis: str,
    exact_match: bool = True,
    verbose: bool = False,
    include_list: List[str] = ["MONDO:"],
) -> List[Tuple[str, str]]:
    """
    Perform grounding for a diagnosis. The 'exact_match' flag controls whether exact or inexact
    (partial) matching is used. Filter results to include only CURIEs that match the 'include_list',
    and exclude results that match the 'exclude_list'.
    Remove redundant groundings from the result.
    """
    config = TextAnnotationConfiguration(matches_whole_text=exact_match)
    annotations = list(annotator.annotate_text(diagnosis, configuration=config))

    # Filter and remove duplicates, while excluding unwanted general terms
    filtered_annotations = list(
        {
            (ann.object_id, ann.object_label)
            for ann in annotations
            if any(ann.object_id.startswith(prefix) for prefix in include_list)
        }
    )
    
    if filtered_annotations:
        return filtered_annotations
    else:
        match_type = "exact" if exact_match else "inexact"
        if verbose:
            logging.warning(f"No {match_type} grounded IDs found for: {diagnosis}")
        return [('N/A', 'No grounding found')]

# Now, integrate curategpt into your ground_diagnosis_text_to_mondo function
def ground_diagnosis_text_to_mondo(
    annotator: TextAnnotatorInterface,
    differential_diagnosis: str,
    verbose: bool = False,
    include_list: List[str] = ["MONDO:"],
    use_ontogpt_grounding: bool = True,
    curategpt_path: str = "../stagedb/",
    curategpt_collection: str = "ont_mondo",
    curategpt_database_type: str = "chromadb"
) -> List[Tuple[str, List[Tuple[str, str]]]]:
    results = []
    
    # Split the input into lines and process each one
    for line in differential_diagnosis.splitlines():
        clean_line = clean_diagnosis_line(line)
        
        # Skip header lines like "**Differential diagnosis:**"
        if not clean_line or "Differential diagnosis" in clean_line.lower():
            continue
        
        # Try grounding the full line first (exact match)
        grounded = perform_oak_grounding(annotator, clean_line, exact_match=True, verbose=verbose, include_list=include_list)
        
        # Try grounding with curategpt if no grounding is found
        if use_ontogpt_grounding and grounded == [('N/A', 'No grounding found')]:
            grounded = perform_curategpt_grounding(
                diagnosis=clean_line,
                path=curategpt_path,
                collection=curategpt_collection,
                database_type=curategpt_database_type,
                verbose=verbose
            )
        
        # If still no grounding is found, log the final failure
        if grounded == [('N/A', 'No grounding found')]:
            if verbose:
                print(f"Final grounding failed for: {clean_line}")
        
        # Append the grounded results (even if no grounding was found)
        results.append((clean_line, grounded))

    return results

In [13]:
# Get the OAK annotator for MONDO
from oaklib import get_adapter
# Set up OAK SQLite implementation for MONDO
annotator = get_adapter("sqlite:obo:mondo")

                                                                              

In [19]:
##
## RUN SOME TESTS
##

from warnings import warn

# helper function to check if a grounding is correct
def assert_correct_result(result, expected_result):
    # Assert that the grounded result matches the expected output
    assert len(result) == len(expected_result), f"Length of grounding result:\n{result}\n does not match length of expected result:\n{expected_result}\n"
    
    for res_item, expected_item in zip(result, expected_result):
        # First, assert that the diagnosis name matches exactly
       
        if res_item[0] != expected_item[0]:
            print(res_item[0], expected_item[0])
            warn(f"Diagnosis mismatch: {res_item[0]} != {expected_item[0]}")
        
        # Then, assert that the grounding list matches, ignoring order
        assert set(res_item[1]) == set(expected_item[1]), f"Grounding mismatch for {res_item[0]}"
    

# Example grounding with OAK annotation - testing on a sample input
differential_diagnosis_text = """
**Differential Diagnosis:**
1. Branchiooculofacial syndrome
2. Unicorn syndrome
3. Cystic fibrosis
4. Velocardiofacial syndrome
**5. ATP6V0A4-related distal renal tubular acidosis**
"""

# Cleaning and grounding the sample differential diagnosis text
cleaned_text = clean_service_answer(differential_diagnosis_text)
# Assert that the cleaning process returns non-empty text
assert cleaned_text != "", "Cleaning failed: the cleaned text is empty."

# Define the expected result for the sample input
expected_result = [
    ('Branchiooculofacial syndrome', [('MONDO:0007235', 'branchiooculofacial syndrome')]), 
    ('Unicorn syndrome', [('N/A', 'No grounding found')]), 
    ('Cystic fibrosis', [('MONDO:0009061', 'cystic fibrosis')]), 
    ('Velocardiofacial syndrome', [('MONDO:0008644', 'velocardiofacial syndrome'), ('MONDO:0008564', 'DiGeorge syndrome'), ('MONDO:0018923', '22q11.2 deletion syndrome')]), 
    ('ATP6V0A4-related distal renal tubular acidosis', [('MONDO:0012700', 'renal tubular acidosis, distal, 4, with hemolytic anemia')])
]

# Ground the text to MONDO
result = ground_diagnosis_text_to_mondo(annotator, cleaned_text, verbose=False)

assert_correct_result(result, expected_result)

In [20]:
# Another test
# Make sure "I'm sorry, but I cannot generate a differential diagnosis with the given information" 
# does not get grounded
result = ground_diagnosis_text_to_mondo(annotator, "I'm sorry, but I cannot generate a differential diagnosis with the given information", verbose=False)
assert_correct_result(result,
                      [("I'm sorry, but I cannot generate a differential diagnosis with the given information",
                        [('N/A', 'No grounding found')])]
                      )

In [None]:
# Apply the cleaning and grounding functions directly to the 'service_answer' column. Might take 45m or so to run.
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

# Now use progress_apply
responses['grounded_diagnosis'] = responses['response'].progress_apply(
    lambda x: ground_diagnosis_text_to_mondo(annotator, clean_service_answer(x), verbose=False)
)

# Save the DataFrame with the new 'grounded_diagnosis' column to a CSV file
output_file = "../supplemental_data/meditron-70b-response/meditron_preview_response_grounded.csv"
#o1_preview_responses.to_csv(output_file, index=False)

# Display a sample of the updated DataFrame
o1_preview_responses

AttributeError: 'list' object has no attribute 'progress_apply'

In [88]:
# Load the DataFrame with the grounded diagnosis text (not using o1_responses from above to avoid re-running the previous cell)
o1_preview_responses = pd.read_csv("../supplemental_data/meditron-70b-response/meditron_preview_response_grounded.csv")
o1_preview_responses

Unnamed: 0,service_answer,metadata,grounded_diagnosis
0,\n1. Branchiooculofacial syndrome\n2. C,prompt0,"[('Branchiooculofacial syndrome', [('MONDO:000..."
1,\n1. VACTERL association\n2. CHARGE syndrome\n,prompt1,"[('VACTERL association', [('MONDO:0008642', 'V..."
2,\n1. Netherton syndrome\n2. Ichthyosis vulgar,prompt2,"[('Netherton syndrome', [('MONDO:0009735', 'Ne..."
3,\n1. Pfeiffer syndrome\n2. Crouzon syndrome,prompt3,"[('Pfeiffer syndrome', [('MONDO:0007043', 'Pfe..."
4,\n1. Branchiooculofacial syndrome\n2. C,prompt4,"[('Branchiooculofacial syndrome', [('MONDO:000..."
...,...,...,...
5208,\n1. Branchiooculofacial syndrome\n,prompt5208,"[('Branchiooculofacial syndrome', [('MONDO:000..."
5209,\n1. GM1 gangliosidosis\n2,prompt5209,"[('GM1 gangliosidosis', [('MONDO:0018149', 'GM..."
5210,\n1. Mitochondrial DNA depletion synd,prompt5210,"[('Mitochondrial DNA depletion synd', [('MONDO..."
5211,\n1. Cohen syndrome\n2. Mental ret,prompt5211,"[('Cohen syndrome', [('MONDO:0008999', 'Cohen ..."


In [13]:
# calculate what fraction of groundable items were grounded to Mondo
import ast

# Initialize counters
total_groundable_diagnoses = 0
grounded_diagnoses_count = 0

# Iterate through the rows, skip rows where service_answer starts with "I'm sorry"
for index, row in o1_preview_responses.iterrows():
    if row['service_answer'].startswith("I'm sorry"):
        continue
    
    grounded_diagnoses_str = row['grounded_diagnosis']
    
    # Ensure grounded_diagnosis is deserialized from a string to a list
    try:
        grounded_diagnoses = ast.literal_eval(grounded_diagnoses_str)
    except (ValueError, SyntaxError) as e:
        print(f"Error parsing grounded diagnosis for index {index}: {e}")
        continue
    
    # Count all diagnoses and the ones that have a valid Mondo grounding
    for grounding in grounded_diagnoses:
        total_groundable_diagnoses += 1
        if grounding[1] and grounding[1][0][0] != 'N/A':
            grounded_diagnoses_count += 1

# Calculate the fraction of all diagnoses that have a valid Mondo ID
fraction_groundable_responses_grounded = grounded_diagnoses_count / total_groundable_diagnoses
print(f"Fraction of groundable service answers grounded: {round(fraction_groundable_responses_grounded, 4)}")

Fraction of groundable service answers grounded: 0.5929


In [94]:
import random

# Initialize a list to store items that were not grounded
ungrounded_items = []

# Iterate through the rows, skipping rows where service_answer starts with "I'm sorry"
for index, row in o1_preview_responses.iterrows():
    if row['service_answer'].startswith("I'm sorry"):
        continue
    
    grounded_diagnoses_str = row['grounded_diagnosis']
    
    # Ensure grounded_diagnosis is deserialized from a string to a list
    try:
        grounded_diagnoses = ast.literal_eval(grounded_diagnoses_str)
    except (ValueError, SyntaxError) as e:
        print(f"Error parsing grounded diagnosis for index {index}: {e}")
        continue
    
    # Collect items that do not have valid Mondo grounding
    for grounding in grounded_diagnoses:
        if not grounding[1] or grounding[1][0][0] == 'N/A':
            ungrounded_items.append(grounding[0])

# Display the list of all items that were not grounded
# randomly sample 20 items and show the total count
print("In total, there are", len(ungrounded_items), "items that were not grounded.")
print("Here is a random sample of 200 items:")
# set a seed for reproducibility
random.seed(42)
print("\n".join(random.sample(ungrounded_items, 200)))

# write out to file 
ungrounded_items = [item for item in ungrounded_items if "differential diagnosis" not in item.lower()]
ungrounded_items
with open('../data/ungrounded_items.txt', 'w') as f:
    f.write("\n".join(ungrounded_items))

In total, there are 3785 items that were not grounded.
Here is a random sample of 200 items:
C
C
C
Neurofibrom
C
POLG-related disorders
Microphthalmia, coloboma, and catar
Col
C
C
2
C
C
C
C
2
2
3
M
C
C
PCDH19-related epilepsy
C
Dyggve-Melchior-Clausen synd
Aicardi-G
Cut
2
C
C
E
Al
Andersen-Tawil
C
C
Mitochondrial complex
Thromboc
Leukoencephalopathy with brainstem and
Oral
C
Brug
Rigid spine syndrome 2, autosom
C
Branchiooculofa
Mitochond
X-linked dilated
C
C
C
C
C
C
C
C
C
C
3
Progressive
C
Al
C
FG
Sp
C
X-linked l
C
C
C
Thromb
C
PPDX syndrome
C
C
2
Alobar holoprosencephaly with median cleft lip
Oculocutaneous
C
Atopic der
Medium-chain acyl-CoA dehydro
Primary
Primary microcephaly, intellectual disability, and slop
Xeroderma
C
2
Loeys-Diet
C
Camptodacty
C
2
Dy
2
C
C
Brug
Sp
C
C
C
Arrhythmogenic
C
C
C
C
Loeys-Diet
C
Branch
C
Rub
C
Microphthalmia, coloboma, and catar
C
C
Thromb
C
C
Macrodontia with short stature syndrome
C
Mitochondrial en
C
C
C
C
C
C
C
2
C
Sp
Mitochondrial complex
C
Leuk

In [None]:
import csv

# Initialize an empty dictionary to store the result
correct_answers_dict = {}

# Define the file path
file_path = '../data/all/prompts/correct_results.tsv'

# Read the TSV file and populate the dictionary
with open(file_path, 'r', newline='') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        # Assign each column to the corresponding variable
        correct_disease_name = row[0]
        correct_ID = row[1]
        prompt_file_name = row[2]
        
        # Populate the dictionary
        correct_answers_dict[prompt_file_name] = (correct_ID, correct_disease_name)

correct_answers_dict

{'PMID_36996813_Individual_KCHYD24_1_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_1_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_13_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_7_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_12_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_6_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorder with intracranial hemorrhage, seizures, and spasticity'),
 'PMID_36996813_Individual_5_en-prompt.txt': ('OMIM:620371',
  'Neurodevelopmental disorde

In [None]:
from tqdm import tqdm
from malco.process.mondo_score_utils import score_grounded_result
import warnings
import csv
import ast
from oaklib import get_adapter
from pathlib import Path
import logging

dont_nuke_existing_output = False

# Create the directory if it doesn't exist; if it does, raise an error
output_dir = Path("../outputdir_all_2024_07_04/meditron_70b_preview_disease_results")
if output_dir.exists():
    if dont_nuke_existing_output:
        raise FileExistsError(f"Directory {output_dir} already exists. Please remove it first.")
    else:
        warnings.warn(f"Directory {output_dir} already exists. Existing files may be overwritten.")
else:
    output_dir.mkdir(parents=True)

# Function to write results to a file
def write_result_to_file(file_path, results):
    with open(file_path, mode='w', newline='') as file:
        writer = csv.writer(file, delimiter='\t')
        # Write header
        writer.writerow(["rank", "disease_name", "disease_identifier", "correct_ID", "grounded_score", "is_correct"])
        # Write each result
        for result in results:
            writer.writerow(result)

# Initialize Mondo adapter
mondo = get_adapter("sqlite:obo:mondo")

# Sequential processing of each row
for index, row in tqdm(o1_preview_responses.iterrows(), total=len(o1_preview_responses)):
    grounded_diagnoses_str = row['grounded_diagnosis']
    
    # Ensure grounded_diagnosis is deserialized from a string to a list
    try:
        grounded_diagnoses = ast.literal_eval(grounded_diagnoses_str)
    except (ValueError, SyntaxError) as e:
        print(f"Error parsing grounded diagnosis for index {index}: {e}")
        continue
    
    metadata = hash_to_name[prompt_id_hash[row['metadata']]]  # Assuming this field exists in o1_responses
    correct_disease = correct_answers_dict.get(metadata)  # Get correct ID from the dict
    
    if not correct_disease:
        logging.warning(f"No correct ID found for metadata: {metadata}")
        continue  # Skip rows with no correct ID
    
    results = []
    
    # Loop through each grounded diagnosis and score them
    for rank, (disease_name, grounded_list) in enumerate(grounded_diagnoses, start=1):
        for grounded_id, _ in grounded_list:  # this is a list because there may be multiple groundings
            grounded_score = score_grounded_result(grounded_id, correct_disease[0], mondo)
            is_correct = grounded_score > 0  # Score > 0 means either exact or subclass match
            
            # Create a result row
            result_row = [rank, disease_name, grounded_id, correct_disease, grounded_score, is_correct]
            results.append(result_row)
    
    # Define the output file path
    output_file = output_dir / f"{metadata}.tsv"
    
    # Write results to file
    write_result_to_file(output_file, results)

print(f"Finished writing scored results to {output_dir}")

100%|██████████| 5213/5213 [17:38<00:00,  4.93it/s]

Finished writing scored results to ../outputdir_all_2024_07_04/meditron_70b_preview_disease_results





In [93]:
import os
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt

def compute_summary_statistics(input_dir, output_file, output_plot):
    # Initialize the counter for each rank
    rank_counter = Counter()

    # Iterate through all files in the directory ending with .tsv
    for filename in os.listdir(input_dir):
        if filename.endswith('.tsv'):
            filepath = os.path.join(input_dir, filename)
            # Read the TSV file
            df = pd.read_csv(filepath, sep='\t')

            # Find the first occurrence of the correct diagnosis
            correct_rank = df[df['is_correct'] == True].index.min() + 1 if not df[df['is_correct'] == True].empty else None

            # Increment the appropriate counter based on the rank or nf if not found
            if correct_rank is not None and 1 <= correct_rank <= 10:
                rank_counter[f'n{correct_rank}'] += 1
            else:
                rank_counter['nf'] += 1

    # Get the total number of records processed
    total_files = sum(rank_counter.values())

    # Prepare the row to be written to the output file (without the 'lang' column)
    output_row = [
        rank_counter.get('n1', 0),
        rank_counter.get('n2', 0),
        rank_counter.get('n3', 0),
        rank_counter.get('n4', 0),
        rank_counter.get('n5', 0),
        rank_counter.get('n6', 0),
        rank_counter.get('n7', 0),
        rank_counter.get('n8', 0),
        rank_counter.get('n9', 0),
        rank_counter.get('n10', 0),
        rank_counter.get('n10', 0) / total_files if total_files else 0,  # n10p: proportion of n10 hits
        rank_counter.get('nf', 0)
    ]

    # Write the results to the output file (without 'lang' column)
    with open(output_file, 'w') as f:
        f.write('n1\tn2\tn3\tn4\tn5\tn6\tn7\tn8\tn9\tn10\tn10p\tnf\n')
        f.write('\t'.join(map(str, output_row)) + '\n')

    print(f"Summary statistics written to {output_file}")

    # Generate the plot
    hits = ['Top-1', 'Top-3', 'Top-10']
    percentages = [
        rank_counter.get('n1', 0) / total_files * 100 if total_files else 0,
        sum(rank_counter.get(f'n{i}', 0) for i in range(1, 4)) / total_files * 100 if total_files else 0,
        sum(rank_counter.get(f'n{i}', 0) for i in range(1, 11)) / total_files * 100 if total_files else 0,
    ]

    # Plotting
    plt.figure(figsize=(10, 6))
    plt.bar(hits, percentages, color=['blue', 'green', 'orange'])
    plt.xlabel('Hits')
    plt.ylabel('Percent of cases')
    plt.title('Top-k accuracy of correct diagnoses')
    plt.ylim(0, 100)  # Adjust this as needed
    plt.savefig(output_plot)
    plt.close()

    print(f"Plot saved to {output_plot}")

# Example usage in a Jupyter notebook:
input_dir = "../outputdir_all_2024_07_04/meditron_70b_preview_disease_results"
output_file = "../outputdir_all_2024_07_04/plots/topn_result_meditron_70b.tsv"
output_plot = "../outputdir_all_2024_07_04/plots/topn_result_meditron_70b.png"

# Ensure the output directory exists
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Call the function
compute_summary_statistics(input_dir, output_file, output_plot)

Summary statistics written to ../outputdir_all_2024_07_04/plots/topn_result_meditron_70b.tsv
Plot saved to ../outputdir_all_2024_07_04/plots/topn_result_meditron_70b.png
