loading required libraries

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
from dotenv import load_dotenv
import json
from src.pdf import create_pdf
from src.gpt import evaluate_gpt
from src.utils import (
    load_preprocess_data,
    run_prediction,
    evaluate_department_results,
    extract_disease_names,
    extract_lab_data,
    select_case_components_based_on_id)
load_dotenv()
from prompts import single_shot_disease_only_prompt,open_ended__top4_prompt,with_reasons_prompt,compare_others_prompt

importing the dataset

In [None]:
filePath="dataset/clinicallab/data_en.json"
df=load_preprocess_data(filePath)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(extract_disease_names)
filtered_df = df[df['differential_diagnosis'].apply(lambda x: len(x) > 1)]
filtered_df['differential_diagnosis'] = filtered_df['differential_diagnosis'] + filtered_df['principal_diagnosis'].apply(lambda x: [x] if x else [])
df=filtered_df
print(len(df))

In [None]:
# 729/3

filtering the dataset

In [None]:


# type="with_reasons"
# prompt=with_reasons_prompt
# type="single_shot"
# prompt=single_shot_disease_only_prompt
# # type="open_ended"
# # # prompt=open_ended__top4_prompt# 
# run_prediction(df,prompt,departments,models=models,type=type,skip=5)

In [1]:

import os
import json
from src.utils import evaluate_department_results
types=["single_shot","with_reasons","check_others_input"]

# results_directory = "results/analysis"
# if not os.path.exists(results_directory):
#     os.makedirs(results_directory)

for type in types:
    folder=os.path.join("results",type)
    print("type is",type)
    all_files=os.listdir(folder)
    all_results={}
    for file in all_files:
        department_name=file.split("_")[0]
        print(department_name)
        if type in file:
            file_path=os.path.join(folder,file)
            department_name=file.split("_")[0]
            with open(file_path, "r") as file:
                data = json.load(file)
                results=evaluate_department_results(data)
            all_results[department_name]=results
    
    with open(f"results/analysis/{type}.json", "w") as outfile: 
        json.dump(all_results, outfile)

type is single_shot
gynecology department
['gpt-4o', 'llama3.1', 'gemma2']
ovarian cyst ovarian cyst true 100.0
ovarian cyst ectopic pregnancy false 41.37931034482759
ovarian cyst ovarian cyst true 100.0
ovarian cyst ovarian cyst true 100.0
ovarian cyst ovarian tumor torsion false 54.54545454545454
ovarian cyst ovarian cyst true 100.0
ovarian cyst ovarian tumor torsion false 54.54545454545454
ovarian cyst ovarian tumor torsion false 54.54545454545454
ovarian cyst ovarian cyst true 100.0
endometrial polyp endometrial polyp true 100.0
endometrial polyp endometrial polyp true 100.0
endometrial polyp endometrial polyp true 100.0
endometrial polyp endometrial polyp true 100.0
endometrial polyp endometrial polyp true 100.0
endometrial polyp endometrial lesions false 72.22222222222221
endometrial cancer endocervical carcinoma false 65.0
endometrial cancer endometrial cancer. true 97.2972972972973
endometrial cancer endocervical carcinoma false 65.0
endometrial cancer endometrial lesions false

In [None]:
def get_diagnosis_by_other_models_by_id(id,department_results,models):
    all_diagnosis=[]
    for model in models:
        diagnosis=department_results[id]["predictions"][model]
        diagnosis= diagnosis.replace("\n", "")
        diagnosis= diagnosis.replace("```json", "")
        diagnosis= diagnosis.replace("```", "")
        all_diagnosis.append(diagnosis)
    return all_diagnosis


from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
import json
def compare_others_ollama(prompt,medical_history, modelname, diseases, department,doctor_diagnosis=[]):
    model = OllamaLLM(model=modelname,temperature=0.1,num_predict=1500,num_ctx=4096)#1500

    system_template = prompt
    
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    chain = chat_prompt | model
    results=chain.invoke({"department": department,
                          "diseases": diseases,
                          "medical_history":json.dumps(medical_history),
                         "doctor_1_diagnosis":doctor_diagnosis[0],
                         "doctor_2_diagnosis":doctor_diagnosis[1],
                         "doctor_3_diagnosis":doctor_diagnosis[2]})
    return results

import json
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate

def compare_others_gpt(prompt,medical_history, model, diseases, department,doctor_diagnosis=[]):
    chat = ChatOpenAI(model_name=model, temperature=0.1,max_tokens=1500)
    system_template = prompt
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])

    # Format the messages
    messages = chat_prompt.format_prompt(
        department=department,
        diseases=diseases,
        medical_history=json.dumps(medical_history),
        doctor_1_diagnosis=doctor_diagnosis[0],
        doctor_2_diagnosis=doctor_diagnosis[1],
        doctor_3_diagnosis=doctor_diagnosis[2],
        
    ).to_messages()
    response = chat(messages)
    return response.content

In [None]:
compare_others_prompt="""You are an experienced doctor from {department}, and you will be provided with the following information about a patient:

Clinical case summary, including past medical history, physical examination findings, laboratory tests, imaging reports, and pathological examination results.
A list of possible diseases.
Diagnoses and reasoning from three other doctors.
Your task is to:

Critically evaluate the diagnoses provided by each doctor.
Cross-check their inputs with the patient’s case summary and your expert knowledge.
Determine the most accurate diagnosis based on the available evidence using your expert knowledge and use the diagnosis provided from other doctors.
Acknowledge potential mistakes in the other doctors’ assessments while integrating valid points into your final diagnosis.

Output your response as follows:
final_diagnosis: Name of the most likely disease from the provided list.
reasons: A detailed explanation based on patient history, physical examination, lab reports, imaging, and other relevant findings that support your final diagnosis.

Clinical case summary: {medical_history}
List of possible diseases: {diseases}
# Doctor 1’s diagnosis and reasoning: {doctor_1_diagnosis}
# Doctor 2’s diagnosis and reasoning: {doctor_2_diagnosis}
# Doctor 3’s diagnosis and reasoning: {doctor_3_diagnosis}"""



# compare_others_prompt="""You are an experienced doctor from {department}, and you will be provided with the following information about a patient:

# Clinical case summary, including past medical history, physical examination findings, laboratory tests, imaging reports, and pathological examination results.
# Diagnoses and reasoning from three other doctors.
# Your task is to Determine the most accurate diagnosis based on the available evidence using your expert knowledge and use the diagnosis provided from other doctors:
# Output your response as follows:

# final_diagnosis: Name of the most likely disease from the provided set.
# reasons: A detailed explanation based on patient history, physical examination, lab reports, imaging, and other relevant findings that support your final diagnosis.

# Clinical case summary: {medical_history}

# # Doctor 1’s diagnosis and reasoning: {doctor_1_diagnosis}
# # Doctor 2’s diagnosis and reasoning: {doctor_2_diagnosis}
# # Doctor 3’s diagnosis and reasoning: {doctor_3_diagnosis}"""

In [None]:
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"
    
] 
departments=["nephrology department",
            "gynecology department",
            "endocrinology department", 
            "neurology department",
             "pediatrics department",
             "cardiac surgical department",                          
             "gastrointestinal surgical department",
             "respiratory medicine department",
             "gastroenterology department",
             "urinary surgical department",
             "hepatobiliary and pancreas surgical department",
             "hematology department"
            ]
all_models= ["gpt-4o","llama3.1","gemma2"]
all_models_to_check=["gpt-4o","llama3.1","gemma2"]
type="check_others_input" 
for department in departments:
    print(department)
    results={}
    with open(f"results/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for model in all_models_to_check:
            if "gpt" in model:
                output=compare_others_gpt(compare_others_prompt,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            else:
                output=compare_others_ollama(compare_others_prompt,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details

    with open(f"results/{type}/{department}_{type}.json", "w") as outfile: 
        json.dump(results, outfile)
    