loading required libraries

In [None]:
import pandas as pd
import os
pd.set_option('display.max_colwidth', 5000)
from dotenv import load_dotenv
import json
from src.utils import (
    load_preprocess_data,
    run_prediction,
    extract_disease_names,
    select_case_components_based_on_id)
load_dotenv()
from prompts import single_shot_disease_only_prompt,with_reasons_prompt,compare_others_prompt,self_refinement_prompt,compare_others_prompt_without_mine

importing the dataset

In [None]:
filePath="dataset/clinicallab/data_en.json"
df=load_preprocess_data(filePath)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(extract_disease_names)
filtered_df = df[df['differential_diagnosis'].apply(lambda x: len(x) > 1)]
filtered_df['differential_diagnosis'] = filtered_df['differential_diagnosis'] + filtered_df['principal_diagnosis'].apply(lambda x: [x] if x else [])
df=filtered_df
print(len(df))

In [None]:
# 729/3

filtering the dataset

In [None]:
# from src.utils import (
#     run_prediction,
# )
# from prompts import single_shot_disease_only_prompt,with_reasons_prompt,compare_others_prompt,self_refinement_prompt
# type="with_reasons"
# prompt=with_reasons_prompt
# type="single_shot"
# prompt=single_shot_disease_only_prompt
# # type="open_ended"
# # # prompt=open_ended__top4_prompt# 
# run_prediction(df,prompt,departments,models=models,type=type,skip=5)

In [None]:

import os
import json
from src.utils import evaluate_department_results
types=["single_shot","with_reasons","check_others_input"]

for type in types:
    folder=os.path.join("results",type)
    print("type is",type)
    all_files=os.listdir(folder)
    all_results={}
    for file in all_files:
        department_name=file.split("_")[0]
        print(department_name)
        if type in file:
            file_path=os.path.join(folder,file)
            department_name=file.split("_")[0]
            with open(file_path, "r") as file:
                data = json.load(file)
                results=evaluate_department_results(data)
            all_results[department_name]=results
    
    with open(f"results/analysis/{type}.json", "w") as outfile: 
        json.dump(all_results, outfile)

In [None]:
import json
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.chat_models import ChatOpenAI

In [None]:
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"

]
departments=["nephrology department",
            "gynecology department",
            "endocrinology department",
            "neurology department",
             "pediatrics department",
             "cardiac surgical department",
             "gastrointestinal surgical department",
             "respiratory medicine department",
             "gastroenterology department",
             "urinary surgical department",
             "hepatobiliary and pancreas surgical department",
             "hematology department"
            ]

In [None]:
def get_diagnosis_by_other_models_by_id(id,department_results,models):
    all_diagnosis=[]
    for model in models:
        diagnosis=department_results[id]["predictions"][model]
        diagnosis= diagnosis.replace("\n", "")
        diagnosis= diagnosis.replace("```json", "")
        diagnosis= diagnosis.replace("```", "")
        all_diagnosis.append(diagnosis)
    return all_diagnosis

def compare_others_ollama(prompt,medical_history, modelname, diseases, department,doctor_diagnosis=[]):
    model = OllamaLLM(model=modelname,temperature=0.1,num_predict=1500,num_ctx=4096)#1500

    system_template = prompt

    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    chain = chat_prompt | model
    if len(doctor_diagnosis)==1:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "diagnosis":doctor_diagnosis[0]})
        return results
    elif len(doctor_diagnosis)==2:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "doctor_1_diagnosis":doctor_diagnosis[0],
                            "doctor_2_diagnosis":doctor_diagnosis[1]})
        return results
    elif len(doctor_diagnosis)==3:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "doctor_1_diagnosis":doctor_diagnosis[0],
                            "doctor_2_diagnosis":doctor_diagnosis[1],
                            "doctor_3_diagnosis":doctor_diagnosis[2]})
        return results



def compare_others_gpt(prompt,medical_history, model, diseases, department,doctor_diagnosis=[]):
    chat = ChatOpenAI(model_name=model, temperature=0.1,max_tokens=1500)
    system_template = prompt
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    if len(doctor_diagnosis)==1:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            diagnosis=doctor_diagnosis[0],

        ).to_messages()
    elif len(doctor_diagnosis)==2:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            doctor_1_diagnosis=doctor_diagnosis[0],
            doctor_2_diagnosis=doctor_diagnosis[1]

        ).to_messages()
    elif len(doctor_diagnosis)==3:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            doctor_1_diagnosis=doctor_diagnosis[0],
            doctor_2_diagnosis=doctor_diagnosis[1],
            doctor_3_diagnosis=doctor_diagnosis[2],

        ).to_messages()
    response = chat(messages)
    return response.content

### Check others including mine

In [None]:
all_models= ["gpt-4o","llama3.1","gemma2"]
all_models_to_check=["gpt-4o","llama3.1","gemma2"]
type="check_others_input"
prompt_type=compare_others_prompt
for department in departments:
    print(department)
    results={}
    with open(f"results4/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for model in all_models_to_check:
            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details

    with open(f"results/{type}/{department}_{type}.json", "w") as outfile:
        json.dump(results, outfile)

### Self refinement

In [None]:
all_models= ["gpt-4o"]
all_models_to_check=["gpt-4o"]
type="self_refinement"
prompt_type=self_refinement_prompt
for department in departments:
    print(department)
    results={}
    with open(f"results-all-models/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for i,model in enumerate(all_models_to_check):
            doctor_diagnosis=[doctor_diagnosis[i]]
            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details
    path = f"results-self-refinement/{type}/{model}/{department}_{type}.json"
    folder_path = os.path.dirname(path)
    os.makedirs(folder_path, exist_ok=True)
    with open(path, "w") as outfile:
      json.dump(results, outfile)

### Check others without mine

In [None]:
all_models= ["gpt-4o","llama3.1","gemma2"]
all_models_to_check=["gpt-4o","llama3.1","gemma2"]
type="check_others_input_without_mine"
prompt_type=compare_others_prompt_without_mine
for department in departments:
    print(department)
    results={}
    with open(f"results4/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for i,model in enumerate(all_models_to_check):
            doctor_diagnosis1=doctor_diagnosis[:]
            if model=="gpt-4o":
              continue
            del doctor_diagnosis1[i]

            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis1)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis1)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details

    path = f"results5/{type}/{model}/{department}_{type}.json"
    folder_path = os.path.dirname(path)
    os.makedirs(folder_path, exist_ok=True)
    with open(path, "w") as outfile:
      json.dump(results, outfile)