loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.gpt import doctor_prompt_gpt,doctor_prompt_gpt_self_confinement
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json,PDF,select_case_components
from src.ollama import doctor_prompt_ollama,doctor_prompt_ollama_self_refinement,doctor_prompt_ollama_combined
load_dotenv()



True

importing the dataset

In [2]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("\nnumber of total cases are",len(data))
print("\neach case have the following fields",list(data[0].keys()))

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis',
                   'diagnostic_basis', 'differential_diagnosis', 
                   'treatment_plan', 'clinical_case_summary', 'imageological_examination', 
                   'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print("\n all the departments available are")
print(allDepartments)

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)


number of total cases are 1500

each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
number of departments available are 24

 all the departments available are
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine dep

filtering the dataset

In [3]:
department="gynecology department"#"respiratory medicine department"#"nephrology department"##"pediatrics department"#"gynecology department"
departmentdf=filterDepartment(df,department)
# departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 8
principal_diagnosis
ovarian cyst             10
endometrial polyp        10
endometrial cancer       10
uterine fibroid          10
adenomyosis              10
cervical polyp           10
cervical cancer          10
heterotopic pregnancy    10
Name: count, dtype: int64
number of preliminary_diagnosis are 8


In [4]:
# # Function to extract disease names from a single row
# def extract_disease_names_from_row(differential_diagnosis_list):
#     disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
#     return disease

# # Apply the function to each row and combine all lists into one
# differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
# refined_differential_diseases=[]
# for disease in differential_diseases:
#     if len(disease) <20:
#         refined_differential_diseases.append(disease)
# uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
# uniquePrimary=uniqueDiseases[:]
# uniqueDiseases.extend(refined_differential_diseases)
# uniqueDiseases=list(set(uniqueDiseases))
# print("number of unique diseases are",len(uniqueDiseases))
# print(uniqueDiseases)

In [6]:
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"
    
]
departments=["respiratory medicine department",
             "nephrology department",
             "pediatrics department",
             "gynecology department",
             "endocrinology department",   
             "neurology department",
             "cardiac surgical department",                          
             "gastrointestinal surgical department" ]
# departments=["pediatrics department"]#,#"respiratory medicine department"]#"cardiac surgical department"]
models = ["llama3.1", "gemma2", "phi3:14b", "mistral-nemo"]#"mistral"
laboratory="abnormal"#"result"
image="impression"#"findings"


In [None]:
laboratory="abnormal"
image="impression"
report_type=f"{laboratory}_{image}"
for department in departments:
    print("department is",department)
    departmentdf=filterDepartment(df,department)
    caseNumbers = [i for i in range(1, len(departmentdf), 10)]
    print(caseNumbers)
    row=departmentdf
    pdf = PDF()
    pdf.set_left_margin(10)
    pdf.set_right_margin(10)
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:        
        case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)


In [8]:
department="neurology department"
caseNumber=1
departmentdf=filterDepartment(df,department)
case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)

In [13]:
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from typing import List
import json
class DiagnosisReason(BaseModel):
    category: str = Field(description="The category of the examination, e.g., medical-history, Physical-Examination, Laboratory-Examination, Image-Examination")
    reasons: List[str] = Field(description="A list of precise reasons for the diagnosis based on the examination")
class FinalDiagnosis(BaseModel):
    final_diagnosis: str = Field(description="Name of the most possible disease within the given set of diseases")
    reasons: List[DiagnosisReason] = Field(description="A list of reasoning categories and the associated reasons for the final diagnosis")
def doctor_prompt_disease_restricted_ollama(medical_history, modelname, diseases, department):
    model = OllamaLLM(model=modelname, temperature=0.1, num_predict=1200, num_ctx=12000)
    print("started model ", modelname)
    prompt_template = """
    You are an experienced doctor from {department}, and you will be provided with the medical history of a patient containing past medical history,
    physical examination, laboratory examination, and imaging examination results. Your task is to identify the most likely disease of the patient using differential diagnosis from the given set of diseases:
    {diseases}
    Analyze step by step each aspect of the physical examination, laboratory examination, and imaging examination based on the above diseases.
    Once done, select the top possible disease using your analysis and differential diagnosis.
    Patient's medical history: {medical_history}.
    Please format your response as a JSON object with the following fields:
    - final_diagnosis: Name of the most possible disease within the above set of diseases.
    - reasons: A list of categories (e.g., medical-history, Physical-Examination) with associated reasons for the final diagnosis. Each reason should be precise and brief.
    JSON output:"""
    prompt = ChatPromptTemplate.from_template(prompt_template)
    output_parser = JsonOutputParser(pydantic_object=FinalDiagnosis)
    chain = prompt | model | output_parser
    output = chain.invoke(
        {
            "medical_history": medical_history,
            "diseases": diseases,
            "department": department,
        }
    )
    return output

In [15]:
filtered_clinical_case_dict

{'Patient basic information': 'Elderly male, 78 years old.',
 'Chief complaint': 'Progressive worsening of right-sided limb weakness for 5 years.',
 'Medical history': "The patient developed right-sided limb weakness 5 years ago without any obvious cause, accompanied by a feeling of heaviness and soreness in the right limb, particularly the right lower limb, difficulty in walking, and slowness. These symptoms progressively worsened, with the gradual onset of right-hand clumsiness. The patient was treated with 'Levodopa and Benserazide Tablets (0.125g, three times a day)', which slightly improved the aforementioned symptoms. During multiple follow-ups, due to fluctuations in symptoms of right limb movement impairment, the patient was given a combination of 'Pramipexole Tablets (0.25mg, three times a day)' and 'Amantadine (0.1g, twice a day)'. Two years ago, the patient sought medical attention due to the progression of right limb symptoms and significant slowness in walking, and was tre

In [16]:
len(departmentdf)

80

In [18]:
filtered_clinical_case_dict

{'Patient basic information': 'Elderly male, 78 years old.',
 'Chief complaint': 'Progressive worsening of right-sided limb weakness for 5 years.',
 'Medical history': "The patient developed right-sided limb weakness 5 years ago without any obvious cause, accompanied by a feeling of heaviness and soreness in the right limb, particularly the right lower limb, difficulty in walking, and slowness. These symptoms progressively worsened, with the gradual onset of right-hand clumsiness. The patient was treated with 'Levodopa and Benserazide Tablets (0.125g, three times a day)', which slightly improved the aforementioned symptoms. During multiple follow-ups, due to fluctuations in symptoms of right limb movement impairment, the patient was given a combination of 'Pramipexole Tablets (0.25mg, three times a day)' and 'Amantadine (0.1g, twice a day)'. Two years ago, the patient sought medical attention due to the progression of right limb symptoms and significant slowness in walking, and was tre

In [20]:
department="neurology department"
departmentdf=filterDepartment(df,department)
caseNumbers=[1,4,7,10,13,16,19,22,25,28,31,34,37,40]#,43,46,49,52,55,58,61,64,67,70,73,76]

for caseNumber in caseNumbers:
    case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)
    print("\n case id:",case_id)
    output=doctor_prompt_disease_restricted_ollama(filtered_clinical_case_dict,"llama3.1",differential_diagnosis,department)


 case id: 712
started model  llama3.1


OutputParserException: Invalid json output: Here is the analysis and differential diagnosis:

**Final Diagnosis**

{
  "final_diagnosis": "Parkinson Disease",
  "reasons": [
    {
      "category": "Medical History",
      "reason": "Progressive worsening of right-sided limb weakness for 5 years, accompanied by slowness, difficulty in walking, and clumsiness, which is consistent with Parkinson's disease."
    },
    {
      "category": "Physical Examination",
      "reason": "Mask-like face, slightly unfluent speech, and normal muscle strength, tone, sensation, and coordination, which are typical of Parkinson's disease."
    },
    {
      "category": "Laboratory Examination",
      "reason": "Normal blood biochemistry test and thyroid function test, but low Vitamin B12 level, which is not uncommon in Parkinson's disease."
    },
    {
      "category": "Imaging Examination",
      "reason": "No obvious abnormalities were observed in the brain MRI scan, which is consistent with early-stage Parkinson's disease."
    }
  ]
}

**Differential Diagnosis**

The other two options, Vascular Parkinson's Syndrome and Progressive Supranuclear Palsy, are less likely based on the following reasons:

* **Vascular Parkinson's Syndrome**: The patient's symptoms have been progressive over 5 years, which is not typical of vascular parkinsonism. Additionally, there are no findings suggestive of cerebrovascular disease in the medical history or imaging examination.
* **Progressive Supranuclear Palsy**: While the patient has difficulty turning over and decreased sense of smell, these symptoms are not specific to PSP. The presence of limb weakness and slowness is more consistent with Parkinson's disease.

Therefore, based on the analysis of the patient's medical history, physical examination, laboratory examination, imaging examination, and differential diagnosis, the most likely disease is **Parkinson Disease**.

In [None]:
laboratory="abnormal"
image="impression"
report_type=f"{laboratory}_{image}"
for department in departments:
    print("department is",department)
    departmentdf=filterDepartment(df,department)
    caseNumbers = [i for i in range(1, len(departmentdf), 10)]
    # caseNumbers=[1]#,11,21]#31,41,51,61]
    # caseNumbers=[1,11,21,31,41,51,61]
    print(caseNumbers)
    row=departmentdf
    pdf = PDF()
    pdf.set_left_margin(10)
    pdf.set_right_margin(10)
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:
        
        case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)
        print(case_id)
        print(principal_diagnosis)
        model="gpt-4"
        output=doctor_prompt_gpt(filtered_clinical_case_dict,model, differential_diagnosis, department)
        print(output)
        output2=doctor_prompt_gpt_self_confinement(filtered_clinical_case_dict,model, differential_diagnosis, department,output)
        print(output2)

In [None]:
laboratory="abnormal"
image="impression"
report_type=f"{laboratory}_{image}"
for department in departments:
    print("department is",department)
    departmentdf=filterDepartment(df,department)
    caseNumbers = [i for i in range(1, len(departmentdf), 10)]
    # caseNumbers=[1]#,11,21]#31,41,51,61]
    print(caseNumbers)
    row=departmentdf
    pdf = PDF()
    pdf.set_left_margin(10)
    pdf.set_right_margin(10)
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:
        case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)
        diagnoses = [("gpt-4", doctor_prompt_gpt(filtered_clinical_case_dict, "gpt-4", uniquePrimary, department)),
                     ("llama3.1", doctor_prompt_ollama(filtered_clinical_case_dict, "llama3.1", differential_diagnosis, department)),
                     ("phi3:14b", doctor_prompt_ollama(filtered_clinical_case_dict, "phi3:14b", differential_diagnosis, department)),
                     ("mistral-nemo", doctor_prompt_ollama(filtered_clinical_case_dict, "mistral-nemo", differential_diagnosis, department))
                     ]
        # diagnoses = [(model_name, doctor_prompt_disease_restricted_ollama(filtered_clinical_case_dict, model_name, differential_diagnosis, department)) for model_name in models]
        pdf.add_case(case_id, principal_diagnosis, differential_diagnosis, clinical_case_dict, diagnoses)
        print("done for caseid",case_id)
    # # Output the PDF to a file
    pdf_file_path = f"./medical-reports/{department}_{report_type}_combined_{laboratory}_{image}.pdf"
    pdf.output(pdf_file_path)

    print(f"PDF report generated: {pdf_file_path}")