This workbook is used to estimate the likelihood ratios for features (e.g. info from the HPI, exam findings) that haven't been reported in the literature yet.

In [3]:
import pandas as pd
import numpy as np
from markitdown import MarkItDown
import llm
from openai import OpenAI
from pydantic import BaseModel
import os
from typing import List, Optional, Literal
from tabulate import tabulate
from IPython.display import display
from dotenv import load_dotenv

load_dotenv()  # looks for a .env file in the current dir by default
#print(os.getenv("OPENAI_API_KEY"))

ASSESSMENT_DIR = r'/Users/reblocke/Research/dx_chat_entropy/Assessments/'
ASSESSMENT_TEMPLATE = os.path.join(ASSESSMENT_DIR, r'asssessment_template_new.xlsx')


In [4]:
# Processing of the specification for what assessments we want the LLM to look for

def process_sheet(sheet_data):
    """
    Processes a sheet to extract the 'Information' and associated 'LR' values.
    Ignores the 'Y/N' column.
    Returns a list of tuples (information_str, lr_value).
    """
    # Drop Y/N column if it exists
    sheet_data = sheet_data.drop(columns=["Y/N"], errors="ignore")

    info_list = []

    # Iterate through each row and capture the single 'Information' + 'LR' from that row
    for _, row in sheet_data.iterrows():
        info_val = row.get("Information", None)  # Safely get 'Information' column
        lr_val = row.get("LR", None)             # Safely get 'LR' column

        # If the information cell is not empty/NaN, we record it.
        # If LR is NaN or missing, we'll store it as None.
        if pd.notnull(info_val):
            # Normalize LR to None if it's NaN
            if pd.isnull(lr_val):
                lr_val = None

            info_list.append((info_val, lr_val))

    return info_list

diagnosis_info = {}
with pd.ExcelFile(ASSESSMENT_TEMPLATE) as spreadsheet_data:
    for sheet_name in spreadsheet_data.sheet_names:
        try:
            sheet_data = pd.read_excel(ASSESSMENT_TEMPLATE, sheet_name=sheet_name)

            if sheet_data.empty:
                print(f"Skipping empty sheet: {sheet_name}")
                continue

            # Process the sheet to get [(info, LR), ...]
            diagnosis_info[sheet_name] = process_sheet(sheet_data)
        except Exception as e:
            print(f"Error processing sheet '{sheet_name}': {e}")

# Print out the collected data
for diagnosis, info_pairs in diagnosis_info.items():
    print(f"Diagnosis: {diagnosis}")
    for info_val, lr_val in info_pairs:
        print(f"  Information: {info_val}, LR: {lr_val}")


Diagnosis: Cardiac
  Information: Do you have any PMHx? (counts as 2 independent minor features), LR: None
  Information: Pain not worse with exertion (requires they clarify exercise 1hr after meal), LR: 0.8
  Information: no tobacco, LR: 0.96
  Information: no associated shortness of breath, LR: 0.89
  Information: no radiation to the neck, arm, or jaw? , LR: 0.9
  Information: positional chest pain (worse when laying down), LR: 3.3333333333333335
  Information: What were you doing when the chest pain started? (eating), LR: None
  Information: Alternative cause of esoph dysphagia becomes obvious(food gets stuck or relieved by regurgitation of food), LR: 0.75
  Information: no prior CAD, LR: 0.75
  Information: no PAD, LR: 0.96
  Information: no HLD, LR: 0.85
  Information: no prior MI, LR: 0.88
  Information: no DM2, LR: 0.9
  Information: no obesity, LR: 0.99
  Information: no history of stroke, LR: 0.97
  Information: no diaphoresis, LR: 0.91
  Information: Pain worse with exertion 

In [5]:
%%time
# ESTIMATE LRS FOR ALL THAT HAVE UNKNOWN LRS
# TODO: Note, in the real workflow - should do this using o1 and only do it once, rather than over and over.

class LRResponse(BaseModel):
    """
    A structured schema ensuring the model returns exactly one of the five LR labels.
    """
    label: Literal["STRONG NEGATIVE", 
                   "WEAK NEGATIVE", 
                   "NEUTRAL", 
                   "WEAK POSITIVE", 
                   "STRONG POSITIVE"]


def estimate_lr(diagnosis, info_val, client):
    """
    Returns one of the five LR categories (STRONG NEGATIVE, WEAK NEGATIVE,
    NEUTRAL, WEAK POSITIVE, STRONG POSITIVE) for a given diagnosis and info_val.
    Uses OpenAI's structured output parsing to ensure the response is valid.
    """

    lr_prompt = """You are an expert diagnostician who is explaining to a trainee which pieces of information they should pay attention to during the diagnostic process. Your task is to summarize how strong of evidence the presence or absence of a particular new finding is for whether a patient has a disease. For example, if a patient has chest pain and the EKG show ST segment elevations, this is STRONG evidence that the chest pain is due to a heart attack. If the patient has t-wave inversions, this is WEAKER evidence in favor - because t-wave changes are not as specific for cardiac causes of chest pain. If they have known heartburn, this is WEAK absence against (because it’s an explanation, but it IS possible to have a history of heartburn but have a heart attack). Lastly, if they are a young female without an inherited condition, this is STRONG evidence against a cardiac cause because that demographic almost never has heart attacks. Lastly, if the piece of information is unhelpful, it would be called neutral. For example, if the patient has blue eyes irrelevant to the cause of chest pain, thus it would be NEUTRAL. 

    I’d like you to follow the following steps:
        1.	Consider, what does the finding mean about what is going on with the patient?
        2.	does the presence of the new information make the disease more or less likely? Or no difference?
        3.	Does the finding make another cause of the same symptom more common? If so, then by definition it makes the target condition a less likely explanation.
        4.	Once you’ve decided whether the finding makes the diagnosis more or less likely, use the following scale to come up with a response:

        •	If knowing the piece of information makes the odds of the diagnosis more than 1.95x higher than it was before, it is a STRONG POSITIVE finding
        •	If knowing the piece of information makes the odds of the diagnosis 1.18x to 1.95x higher than it was before, it is a WEAK POSITIVE finding
        •	If knowing the piece of information makes changes the odds only 0.92x to 1.18x as likely as it was before, then it is a NEUTRAL finding
        •	If knowing the piece of information makes the odds of the diagnosis 0.72x to 0.92x times as likely as it was before, then it is a WEAK NEGATIVE finding.
        •	If knowing the piece of information makes the odds of the diagnosis less than 0.72x higher than it was before, it is a STRONG POSITIVE

    As another example, say I’m wondering whether a patient with GI bleeding has a lower GI bleed (below the ligament of Treitz) or an upper GI bleed. The presence of clots in the blood is a very strong predictor of lower GI bleeding, because bleeding from the stomach cannot form clots due to the stomach acid. You should use all physiologic clues to whether a piece of information is a STRONG, WEAK, or NEUTRAL predictor. 

    You will receive inputs in the following format; Target condition: <Condition, e.g. Cardiac chest pain>. Finding: <piece of information, e.g. ‘No radiation to the neck, arm, or jaw’>.

    You must respond with EXACTLY ONE of the following categories (no extra text):
    STRONG POSITIVE, WEAK POSITIVE, NEUTRAL, WEAK NEGATIVE, STRONG NEGATIVE.

    You must respond with EXACTLY one of these categories in valid JSON
    Your output must match the Pydantic schema: { 'label': '<one of the five strings>' }

    Here are some examples:
    Prompt = Target condition: Cardiac Chest Pain. Finding: Pain not worse with exertion (requires they clarify exercise 1hr after meal).
    You would reason that because cardiac chest pain is usually worse with exertion because exertion worsens cardiac demand for oxygen, and thus worsens ischemia.
    Response = {
        "label": "STRONG NEGATIVE"
    }

    Prompt =  Target condition: Cardiac Chest Pain. Finding: No tobacco.
    You would reason that because being someone who smokes increases your risk of coronary artery disease, and thus being a never smoker means you’re at less risk… but many people who have heart attacks still smoke, so it’s only a weak predictor. 
    Response = {
        "label": "WEAK NEGATIVE"
    }

    Prompt = Target condition: Cardaic Chest Pain. Finding = enjoys playing chess.
    You would reason that because enjoying chest has no relationship to having a heart attack.
    Response = {
        "label": "NEUTRAL"
    }

    Prompt = Target condition: Cardiac Chest Pain. Finding = pain located behind the sternum
    You would reason that because cardiac chest pain is often experienced behind the sternum (thus, more likely), but so are many other causes of chest pain - like GERD.
    Response = {
        "label": "WEAK POSITIVE"
    }

    Prompt = Condition: Cardiac Chest Pain. Finding = pain worse with exertion.
    You would reason that because the increased myocardial oxygen consumption worsens the pain if oxygen delivery to the myocardium is the cause, as it is in heart attacks.
    Response = {
        "label": "STRONG NEGATIVE"
    }

    OK: here’s the prompt…. """
        
    # Create your conversation messages
    messages = [
        {"role": "system", "content": lr_prompt},
        {
            "role": "user",
            "content": f"Condition: {diagnosis}\nFinding: {info_val}"
        }
    ]
    
    # Make the structured call to the model
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=messages,
        response_format=LRResponse,  # Our Pydantic model
    )
    
    # Extract the parsed LRResponse from the completion
    lr_response = completion.choices[0].message.parsed  # This will be an LRResponse instance
    # The label is guaranteed to be one of the enumerated strings by Pydantic
    return lr_response.label


client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
for diagnosis, info_pairs in diagnosis_info.items():
    # info_pairs is a list of (info_val, lr_val) tuples
    for idx, (info_val, lr_val) in enumerate(info_pairs):
        if lr_val is None:  # Missing LR
            estimated_label = estimate_lr(diagnosis, info_val, client)
            # Update the tuple
            info_pairs[idx] = (info_val, estimated_label)

CPU times: user 295 ms, sys: 17.7 ms, total: 312 ms
Wall time: 24.9 s


In [7]:
output_path = os.path.join(ASSESSMENT_DIR, "completed_lrs.xlsx")

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    for diagnosis, info_pairs in diagnosis_info.items():
        # Convert list of (info, lr) tuples into a DataFrame
        data = [{"Information": info_val, "LR": lr_val} for info_val, lr_val in info_pairs]
        df = pd.DataFrame(data, columns=["Information", "LR"])
        
        # Write each diagnosis to a separate sheet
        # (sheet_name must be <= 31 chars in Excel)
        sheet_name = str(diagnosis)[:31]
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Excel file saved to: {output_path}")

Excel file saved to: /Users/reblocke/Research/dx_chat_entropy/Assessments/completed_lrs.xlsx
