loading required libraries

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
from dotenv import load_dotenv
import json
from src.pdf import create_pdf
from src.gpt import evaluate_gpt
from src.utils import (
    load_preprocess_data,
    run_prediction,
    evaluate_department_results,
    extract_disease_names)
load_dotenv()
from prompts import single_shot_disease_only_prompt,open_ended__top4_prompt,with_reasons_prompt,compare_others_prompt

importing the dataset

In [None]:
filePath="dataset/clinicallab/data_en.json"
df=load_preprocess_data(filePath)



# Apply the function to filter and transform the 'differential-diagnosis' field
df['differential_diagnosis'] = df['differential_diagnosis'].apply(extract_disease_names)

# Filter rows where the number of elements in 'differential-diagnosis' is greater than 1
filtered_df = df[df['differential_diagnosis'].apply(lambda x: len(x) > 1)]
filtered_df['differential_diagnosis'] = filtered_df['differential_diagnosis'] + filtered_df['principal_diagnosis'].apply(lambda x: [x] if x else [])
df=filtered_df


In [None]:
departments=["nephrology department",
            #  "gynecology department",
            # "endocrinology department", 
            # "neurology department",
            #  "pediatrics department",
            #  "cardiac surgical department",                          
            #  "gastrointestinal surgical department",
            #  "respiratory medicine department",
            #  "gastroenterology department",
            #  "urinary surgical department",
            #  "hepatobiliary and pancreas surgical department",
            #  "hematology department"
            ]
models = ["gpt-4","llama3.1","gemma2", "phi3:14b", "mistral-nemo"]
models= ["gpt-4o","llama3.1"]

In [None]:
# 729/3

filtering the dataset

In [None]:


# type="with_reasons"
# prompt=with_reasons_prompt
# type="single_shot"
# prompt=single_shot_disease_only_prompt
# # type="open_ended"
# # # prompt=open_ended__top4_prompt# 
# run_prediction(df,prompt,departments,models=models,type=type,skip=5)

In [None]:

import os
import json
from src.utils import evaluate_department_results
types=["single_shot","with_reasons"]
for type in types:
    all_files=os.listdir("results")
    all_results={}
    for file in all_files:
        department_name=file.split("_")[0]
        print(department_name)
        if type in file:
            file_path=os.path.join("results",file)
            department_name=file.split("_")[0]
            with open(file_path, "r") as file:
                data = json.load(file)
                results=evaluate_department_results(data)
            all_results[department_name]=results
    with open(f"{type}.json", "w") as outfile: 
        json.dump(all_results, outfile)

In [None]:
def extract_lab_data(lab_data, field):
    """
    Extracts the specified field ('result' or 'abnormal') from the laboratory examination data.

    :param lab_data: A dictionary containing laboratory examination data.
    :param field: The field to extract ('result' or 'abnormal').
    :return: A dictionary containing the extracted field data.
    """
    extracted_data = {}
    
    # Iterate over each test in the laboratory examination data
    for test, data in lab_data.items():
        if field in data:
            extracted_data[test] = data[field]
    
    return extracted_data

def select_case_components_based_on_id(df,case_id,required_fields,laboratory="result",image="findings"):
    
    row = df[df['id'] == case_id].iloc[0]
    clinical_case = row.clinical_case_summary
    principal_diagnosis = row.principal_diagnosis
    department=row.clinical_department
    differential_diagnosis = row.differential_diagnosis
    differential_diagnosis = [item.capitalize() for item in differential_diagnosis]
    differential_diagnosis.sort()
    try:
        laboratory_examination = extract_lab_data(row.laboratory_examination,laboratory)
    except:
        laboratory_examination = "Not available."
    try:
        imageological_examination = extract_lab_data(row.imageological_examination,image)
    except:
        imageological_examination="Not available."
    clinical_case_dict={
    "Patient basic information":clinical_case['Patient Basic Information'],
    "Chief complaint" : clinical_case['Chief Complaint'],
    "Medical history" : clinical_case['Medical History'],
    "Physical examination" : clinical_case['Physical Examination'],
    "Laboratory examination" : laboratory_examination,
    "Imageological examination" : imageological_examination,
    "Auxillary examination": clinical_case['Auxiliary Examination'],
    "Pathological examination" : row.pathological_examination
    }
    
    filtered_clinical_case_dict={}
    for key in required_fields:
        filtered_clinical_case_dict[key]=clinical_case_dict[key]
    
    return principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict


def get_diagnosis_by_id(id,department_results,models):
    all_diagnosis=[]
    for model in models:
        diagnosis=department_results[id]["predictions"][model]
        diagnosis= diagnosis.replace("\n", "")
        diagnosis= diagnosis.replace("```json", "")
        diagnosis= diagnosis.replace("```", "")
        all_diagnosis.append(diagnosis)
    return all_diagnosis


from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
import json
def compare_others_ollama(prompt,medical_history, modelname, diseases, department,doctor_diagnosis=[]):
    model = OllamaLLM(model=modelname,temperature=0.1,num_predict=1500,num_ctx=4096)#1500

    system_template = prompt
    
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    chain = chat_prompt | model
    results=chain.invoke({"department": department,
                          "diseases": diseases,
                          "medical_history":json.dumps(medical_history),
                         "doctor_1_diagnosis":doctor_diagnosis[0],
                         "doctor_2_diagnosis":doctor_diagnosis[1],
                         "doctor_3_diagnosis":doctor_diagnosis[2]})
    print("done for model",modelname)
    return results

In [None]:
compare_others_prompt="""You are an experienced doctor from {department}, and you will be provided with the following information about a patient:

Clinical case summary, including past medical history, physical examination findings, laboratory tests, imaging reports, and pathological examination results.
A list of possible diseases.
Diagnoses and reasoning from three other doctors.
Your task is to:

Critically evaluate the diagnoses provided by each doctor.
Cross-check their inputs with the patient’s case summary and your expert knowledge.
Determine the most accurate diagnosis based on the available evidence.
Acknowledge potential mistakes in the other doctors’ assessments while integrating valid points into your final diagnosis.
Output your response as follows:

final_diagnosis: Name of the most likely disease from the provided set.
reasons: A detailed explanation based on patient history, physical examination, lab reports, imaging, and other relevant findings that support your final diagnosis.

Clinical case summary: {medical_history}
List of possible diseases: {diseases}
# Doctor 1’s diagnosis and reasoning: {doctor_1_diagnosis}
# Doctor 2’s diagnosis and reasoning: {doctor_2_diagnosis}
# Doctor 3’s diagnosis and reasoning: {doctor_3_diagnosis}"""


compare_others_prompt="""You are an experienced doctor from {department}, and you will be provided with the following information about a patient:

Clinical case summary, including past medical history, physical examination findings, laboratory tests, imaging reports, and pathological examination results.
Diagnoses and reasoning from three other doctors.
Your task is to:

Critically evaluate the diagnoses provided by each doctor.
Cross-check their inputs with the patient’s case summary and your expert knowledge.
Determine the most accurate diagnosis based on the available evidence.
Acknowledge potential mistakes in the other doctors’ assessments while integrating valid points into your final diagnosis.
Output your response as follows:

final_diagnosis: Name of the most likely disease from the provided set.
reasons: A detailed explanation based on patient history, physical examination, lab reports, imaging, and other relevant findings that support your final diagnosis.

Clinical case summary: {medical_history}

# Doctor 1’s diagnosis and reasoning: {doctor_1_diagnosis}
# Doctor 2’s diagnosis and reasoning: {doctor_2_diagnosis}
# Doctor 3’s diagnosis and reasoning: {doctor_3_diagnosis}"""

In [30]:
with open("results/neurology department_with_reasons_result_findings.json", "r") as file:
    data = json.load(file)
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"
    
]   

department_results=data
department='neurology department'

ids=['718', '729', '741', '744', '759', '765', '769', '772', '775', '784']

for case_id in ids:
    principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
    doctor_diagnosis=get_diagnosis_by_id(case_id,department_results,["gpt-4o","llama3.1","gemma2"])
    print("principal diagnosis for case_id",case_id,principal_diagnosis)
    results=compare_others_ollama(compare_others_prompt,filtered_clinical_case_dict,"llama3.1",differential_diagnosis,department,doctor_diagnosis)
    print("the result is ",results)

    

principal diagnosis for case_id 718 parkinson disease
done for model llama3.1
the result is  **final_diagnosis:** Parkinson disease
**reasons:**

After carefully evaluating the clinical case summary, laboratory tests, imaging reports, and pathological examination results provided by each doctor, I agree with Doctor 1's initial assessment that the patient is most likely suffering from **Parkinson disease (PD)**.

Here are my reasons for this conclusion:

* The patient's medical history of tremors in the right upper limb for more than 3 years, slow movement, and limb stiffness are classic symptoms of PD. The improvement with Levodopa and Carbidopa treatment further supports this diagnosis.
* Physical examination findings, such as increased muscle tone and normal tendon reflexes, are consistent with PD. The absence of Babinski sign and normal coordination also support this diagnosis.
* Laboratory examination results do not indicate any specific abnormalities that would point towards alter

In [29]:
with open("results/neurology department_with_reasons_result_findings.json", "r") as file:
    data = json.load(file)
all_ids=data.keys()
with open("with_reasons.json", "r") as file:
    data = json.load(file)
department='neurology department'   
department_results=data[department]
models=list(department_results.keys())
none_detected=[]
all_detected=[]
atleast_one_detected=[]
for id  in all_ids:
    count=0
    for model in models:
        if id in department_results[model]["true"]:
            count+=1
    if count==0:
        none_detected.append(id)
    elif count==len(models):
        all_detected.append(id)
    else:
        atleast_one_detected.append(id)
print("none detected list is",none_detected)
print("atleast one detected list is ",atleast_one_detected)
print("all detected list is ",all_detected)

none detected list is ['747', '790']
atleast one detected list is  ['718', '729', '741', '744', '759', '765', '769', '772', '775', '784']
all detected list is  ['712', '715', '722', '726', '732', '735', '738', '750', '753', '756', '762', '778', '781', '787']
