# A Programmatic Evaluation of Large Language Models for Clinical Decision Support in Heart Failure and Cardiomyopathy Management


---


In [40]:
import os
import pandas as pd
from dotenv import load_dotenv
from chatlas import ChatOpenAI, ChatGoogle, ChatPerplexity, ChatOllama, ChatGithub
from tqdm import tqdm

In [41]:
# parameters
n_queries = 3  # number of responses per model per question to assess consistency and variability

## Load API Keys


In [42]:
# Load environment variables from .env file
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")
github_pat = os.getenv("GITHUB_PAT")

## Load Questions from CSV


In [43]:
questions_df = pd.read_csv("./data/sample_questions.csv")
questions_df = questions_df[questions_df['select'] == 'yes']
questions_df

Unnamed: 0,index,category,question,select
0,1,Heart Failure,What are the common symptoms of heart failure?,yes
1,2,Heart Failure,What medications are typically prescribed for ...,yes
2,3,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,yes
3,4,Cardiomyopathy,How is cardiomyopathy diagnosed?,yes
4,5,Heart Failure,What lifestyle changes can help manage heart f...,yes


In [44]:
# Repeat each question n_queries times
questions_df = questions_df.loc[questions_df.index.repeat(
    n_queries)].reset_index(drop=True)

# Add query_id column to differentiate between repetitions
questions_df['query_id'] = questions_df.groupby('index').cumcount() + 1

# Reorder columns to place 'query_id' after 'index'
cols = questions_df.columns.tolist()
cols.insert(1, cols.pop(cols.index('query_id')))
questions_df = questions_df[cols]
questions_df

Unnamed: 0,index,query_id,category,question,select
0,1,1,Heart Failure,What are the common symptoms of heart failure?,yes
1,1,2,Heart Failure,What are the common symptoms of heart failure?,yes
2,1,3,Heart Failure,What are the common symptoms of heart failure?,yes
3,2,1,Heart Failure,What medications are typically prescribed for ...,yes
4,2,2,Heart Failure,What medications are typically prescribed for ...,yes
5,2,3,Heart Failure,What medications are typically prescribed for ...,yes
6,3,1,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,yes
7,3,2,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,yes
8,3,3,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,yes
9,4,1,Cardiomyopathy,How is cardiomyopathy diagnosed?,yes


## Define Function to Query APIs


In [45]:
def initialize_model(model_name):
    """
    Helper function to initialize a model based on the name and environment variable.
    Returns a model instance or raises an informative error.
    """
    system_prompt = "You are a friendly but terse assistant."

    if model_name == 'openai':
        api_key = os.getenv('OPENAI_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing OPENAI_API_KEY")
        # https://platform.openai.com/docs/models
        return ChatOpenAI(model="gpt-4o", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'google':
        api_key = os.getenv('GOOGLE_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing GOOGLE_API_KEY")
        # https://aistudio.google.com/
        return ChatGoogle(model="gemini-2.5-pro-preview-05-06", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'perplexity':
        api_key = os.getenv('PERPLEXITY_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing PERPLEXITY_API_KEY")
        # ["sonar", "sonar-pro", "r1-1776"], https://docs.perplexity.ai/models/model-cards
        return ChatPerplexity(model="sonar", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'github':
        api_key = os.getenv('GITHUB_PAT')
        if not api_key:
            raise EnvironmentError("Missing GITHUB_PAT")
        # https://aistudio.google.com/
        return ChatGithub(model="gpt-4o", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'ollama':
        return ChatOllama(model="llama3.2", system_prompt=system_prompt)

    else:
        raise ValueError(f"Unsupported model: {model_name}")

In [51]:

def query_models_chatlas(questions_df, models_to_run=['openai', 'google', 'perplexity', 'github', 'ollama']):
    """
    Query selected models using Chatlas and return a DataFrame with responses.

    Args:
        questions_df (pd.DataFrame): Must have 'index', 'category', 'question'.
        models_to_run (list): Models to query. Options: 'openai', 'google', 'perplexity', 'github', 'ollama'.

    Returns:
        pd.DataFrame: Responses from each model for each question.
    """
    responses = []

    # Initialize selected models
    model_instances = {}
    for model in models_to_run:
        try:
            model_instances[model] = initialize_model(model)
        except Exception as e:
            print(f"[Warning] Failed to initialize {model}: {e}")
            model_instances[model] = None

    # Query each model
    for _, row in tqdm(questions_df.iterrows(), total=len(questions_df), desc="Querying models"):
        question, category = row['question'], row['category']
        response_entry = {
            'index': row['index'],
            'query_id': row['query_id'],
            'category': category,
            'question': question
        }

        for model_key in models_to_run:
            instance = model_instances.get(model_key)
            try:
                if instance:
                    response = instance.chat(question, echo="none")
                    response_entry[f'{model_key}_response'] = response.content
                else:
                    response_entry[f'{model_key}_response'] = "Initialization failed"
            except Exception as e:
                response_entry[f'{model_key}_response'] = f"Error: {str(e)}"

        responses.append(response_entry)

    return pd.DataFrame(responses)

## Run Queries and Save Responses


In [53]:
responses_df = query_models_chatlas(questions_df=questions_df, models_to_run=[
                                    'ollama'])  # ['openai', 'google', 'perplexity', 'github', 'ollama']

Querying models: 100%|██████████| 15/15 [00:56<00:00,  3.79s/it]


In [54]:
responses_df

Unnamed: 0,index,query_id,category,question,ollama_response
0,1,1,Heart Failure,What are the common symptoms of heart failure?,"Swollen legs, ankles, and feet (edema). Shortn..."
1,1,2,Heart Failure,What are the common symptoms of heart failure?,Common symptoms include:\n\n1. Shortness of br...
2,1,3,Heart Failure,What are the common symptoms of heart failure?,Common symptoms include:\n\n1. Shortness of br...
3,2,1,Heart Failure,What medications are typically prescribed for ...,Typically prescribed medications for heart fai...
4,2,2,Heart Failure,What medications are typically prescribed for ...,Commonly prescribed medications for heart fail...
5,2,3,Heart Failure,What medications are typically prescribed for ...,Typically prescribed medications include:\n\n1...
6,3,1,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,Common genetic causes of cardiomyopathy includ...
7,3,2,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,Some common genetic causes of cardiomyopathy i...
8,3,3,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,Genetic causes of cardiomyopathy include:\n\n1...
9,4,1,Cardiomyopathy,How is cardiomyopathy diagnosed?,Diagnosis of cardiomyopathy typically involves...
