# A Programmatic Evaluation of Large Language Models for Clinical Decision Support in Heart Failure and Cardiomyopathy Management

***

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from chatlas import ChatOpenAI, ChatGoogle, ChatPerplexity, ChatOllama
from tqdm import tqdm

## Load API Keys

In [2]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
perplex_api_key = os.getenv("PERPLEX_API_KEY")
github_pat = os.getenv("GITHUB_PAT")

## Load Questions from CSV

In [3]:
questions_df = pd.read_csv("./data/sample_questions.csv")
questions_df = questions_df[questions_df['select'] == 'yes']
questions_df

Unnamed: 0,index,category,question,select
0,1,Heart Failure,What are the common symptoms of heart failure?,yes
1,2,Heart Failure,What medications are typically prescribed for ...,yes
2,3,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,yes
3,4,Cardiomyopathy,How is cardiomyopathy diagnosed?,yes
4,5,Heart Failure,What lifestyle changes can help manage heart f...,yes


## Define Function to Query APIs

In [4]:
def initialize_model(model_name):
    """
    Helper function to initialize a model based on the name and environment variable.
    Returns a model instance or raises an informative error.
    """
    system_prompt = "You are a friendly but terse assistant."

    if model_name == 'openai':
        api_key = os.getenv('OPENAI_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing OPENAI_API_KEY")
        return ChatOpenAI(model="gpt-4o", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'google':
        api_key = os.getenv('GOOGLE_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing GOOGLE_API_KEY")
        return ChatGoogle(model="gemini-pro", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'perplexity':
        api_key = os.getenv('PERPLEXITY_API_KEY')
        if not api_key:
            raise EnvironmentError("Missing PERPLEXITY_API_KEY")
        return ChatPerplexity(model="online", system_prompt=system_prompt, api_key=api_key)

    elif model_name == 'ollama':
        return ChatOllama(model="llama3.2", system_prompt=system_prompt)

    else:
        raise ValueError(f"Unsupported model: {model_name}")

In [5]:

def query_models_chatlas(questions_df, models_to_run=['openai', 'google', 'perplexity', 'ollama']):
    """
    Query selected models using Chatlas and return a DataFrame with responses.

    Args:
        questions_df (pd.DataFrame): Must have 'index', 'category', 'question'.
        models_to_run (list): Models to query. Options: 'openai', 'google', 'perplexity', 'ollama'.

    Returns:
        pd.DataFrame: Responses from each model for each question.
    """
    responses = []

    # Initialize selected models
    model_instances = {}
    for model in models_to_run:
        try:
            model_instances[model] = initialize_model(model)
        except Exception as e:
            print(f"[Warning] Failed to initialize {model}: {e}")
            model_instances[model] = None

    # Query each model
    for _, row in tqdm(questions_df.iterrows(), total=len(questions_df), desc="Querying models"):
        question, category = row['question'], row['category']
        response_entry = {
            'index': row['index'],
            'category': category,
            'question': question
        }

        for model_key in models_to_run:
            instance = model_instances.get(model_key)
            try:
                if instance:
                    response = instance.chat(question, echo="none")
                    response_entry[f'{model_key}_response'] = response.content
                else:
                    response_entry[f'{model_key}_response'] = "Initialization failed"
            except Exception as e:
                response_entry[f'{model_key}_response'] = f"Error: {str(e)}"

        responses.append(response_entry)

    return pd.DataFrame(responses)

## Run Queries and Save Responses

In [6]:
query_models_chatlas(questions_df=questions_df, models_to_run=['ollama'])

Querying models: 100%|██████████| 5/5 [00:19<00:00,  3.89s/it]



Unnamed: 0,index,category,question,ollama_response
0,1,Heart Failure,What are the common symptoms of heart failure?,"Shortness of breath, fatigue, swollen legs and..."
1,2,Heart Failure,What medications are typically prescribed for ...,Commonly used medications include:\n\n1. Diure...
2,3,Cardiomyopathy,What are the genetic causes of cardiomyopathy?,Some common genetic causes of cardiomyopathy i...
3,4,Cardiomyopathy,How is cardiomyopathy diagnosed?,Diagnosis typically involves:\n\n1. Medical hi...
4,5,Heart Failure,What lifestyle changes can help manage heart f...,Lifestyle modifications that can help:\n\n1. E...
