In [None]:
import pandas as pd

In [None]:
# Set OpenAI API Key
import openai
deployment_name = "fm-endo"
openai.api_base = f"https://{deployment_name}.openai.azure.com/"
openai.api_key = pd.read_csv("/home/shj622/rajpurkarlab/home/shj622/craft-md-v2/keys/azure_fmendo.txt", 
                             header=None).iloc[0,0]

In [None]:
from src.utils import get_choices

## Generate clinical LLM agent responses for vignette and conversational formats (multi-turn, single-turn and summarized)

### OpenAI Models (GPT-3.5, GPT-4)

In [None]:
from src.craftmd import craftmd_gpt

In [None]:
model_name = "gpt4_1106"
dataset = pd.read_csv("./data/usmle_derm_dataset.csv", index_col=0)

cases = [(dataset.loc[idx,"case_id"], 
          dataset.loc[idx,"case_vignette"], 
          dataset.loc[idx,"category"],
          get_choices(dataset,idx)) for idx in dataset.index]

path_dir = f"./results/{model_name}"

case = cases[0]
craftmd_gpt(case, path_dir, model_name)

### Open-source Models (Mistral-v1, Mistral-v2, LLaMA2-7b)

These models require GPU resources. We performed all evaluations on Quadro RTX 8000 48gb

In [None]:
from src.craftmd import craftmd_opensource
from src.models import get_model_and_tokenizer

In [None]:
# # To download open-source models, if not already installed in your conda environment
# from huggingface_hub import login
# login(token = "insert_huggingface_token")

In [None]:
model_name = "mistral-v2"
dataset = pd.read_csv("./data/usmle_derm_dataset.csv", index_col=0)

cases = [(dataset.loc[idx,"case_id"], 
          dataset.loc[idx,"case_vignette"], 
          dataset.loc[idx,"category"],
          get_choices(dataset,idx)) for idx in dataset.index]

path_dir = f"./results/{model_name}"

case = cases[0]

model, tokenizer = get_model_and_tokenizer(model_name)

if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id 

craftmd_opensource(case, path_dir, model, tokenizer)

### Multimodal LLM (GPT-4V)

In [None]:
from src.craftmd import craftmd_multimodal

In [None]:
model_name = "gpt4v"
dataset = pd.read_csv("./data/nejmai_dataset.csv", index_col=0)

cases = [(dataset.loc[idx,"case_id"],
          dataset.loc[idx,"case_vignette"],
          get_choices(dataset,idx)) for idx in dataset.index]

path_dir = f"./results/{model_name}"
img_dir = f"../../craft-md-v2/data/nejm/imgs/"

craftmd_multimodal(cases[0], img_dir, path_dir, deployment_name)

# Evaluate using Grader-AI agent

Below code works for both open-source and GPT models

In [None]:
from src.graderai_eval import graderai_evaluation

In [None]:
model_name = "gpt4_1106"
dataset = pd.read_csv("./data/usmle_derm_dataset.csv", index_col=0)

experiment_names = ["vignette_frq", "multiturn_frq", "singleturn_frq", "summarized_frq"]
path_dir = f"./results/{model_name}"

graderai_evaluation("case_0", dataset, path_dir, experiment_names)