loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.gpt import doctor_prompt_gpt_open_ended,doctor_prompt_gpt_semi_ended
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json,PDF,select_case_components
from src.ollama import doctor_prompt_ollama,doctor_prompt_ollama_openended,doctor_prompt_ollama_semi_ended
load_dotenv()



True

importing the dataset

In [2]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("\nnumber of total cases are",len(data))
print("\neach case have the following fields",list(data[0].keys()))

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis',
                   'diagnostic_basis', 'differential_diagnosis', 
                   'treatment_plan', 'clinical_case_summary', 'imageological_examination', 
                   'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print("\n all the departments available are")
print(allDepartments)

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)


number of total cases are 1500

each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
number of departments available are 24

 all the departments available are
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine dep

filtering the dataset

In [3]:
department="neurology department"#"respiratory medicine department"#"nephrology department"##"pediatrics department"#"gynecology department"
departmentdf=filterDepartment(df,department)
# departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 8
principal_diagnosis
parkinson disease            10
epilepsy                     10
transient ischemic attack    10
myelitis                     10
cerebral infarction          10
subarachnoid hemorrhage      10
alzheimer disease            10
facial neuritis              10
Name: count, dtype: int64
number of preliminary_diagnosis are 8


In [4]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <40:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases = [item.lower() for item in uniqueDiseases]
uniqueDiseases = sorted(set(uniqueDiseases))

print("number of unique diseases are",len(uniqueDiseases))
print(len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 58
58
['acute myelitis', 'acute spinal cord compression', 'acute spinal cord compression syndrome', 'alzheimer disease', 'benign positional vertigo', 'brain hemorrhage', 'brainstem infarction', 'cerebral hemorrhage', 'cerebral infarction', 'cervical vertigo', 'cranial tumor', 'dementia with lewy bodies (dlb)', 'diabetic neuropathy', 'electrolyte disorder', 'epilepsy', 'facial neuritis', 'frontotemporal dementia', 'frontotemporal dementia (ftd)', 'guillain-barre syndrome', 'guillain-barré syndrome', 'hereditary spastic paraplegia', 'intracranial infection', 'intracranial space-occupying lesions', 'lewy body dementia', 'lyme disease', 'multiple sclerosis', 'multiple system atrophy', 'myelitis', 'neurological lyme disease', 'neuromyelitis optica', 'optic neuritis', 'otic origin facial nerve paralysis', 'parkinson disease', "parkinson's disease dementia (pdd)", 'periodic paralysis', 'polymyositis', 'posterior fossa lesions', 'posterior fossa tumor or meningiti

In [5]:
uniqueDiseases1=['Lumbar Tuberculosis', 'systemic lupus erythematosus', 'Low T3 Syndrome', 'ankylosing spondylitis', "Graves' disease", 'Diabetes insipidus', 'Psoriatic Arthritis', 'Type 1 Diabetes', 'Type 1 diabetes', 'Hyperthyroidism', 'Scleredema', 'hypothyroidism', 'Hypoglycemia', 'Hypokalemia', 'Myositis', 'subacute thyroiditis', 'Simple Skin Allergy', 'Diabetes Insipidus', 'Simple Goiter', 'Psoriatic arthritis', 'rheumatoid arthritis', 'Rheumatic Fever', 'diabetes mellitus', 'Simple goiter', 'Colorectal cancer', 'gouty arthritis', 'Rheumatic fever', 'Osteoarthritis']
uniqueDiseases1=['acute suppurative thyroiditis', 'ankylosing spondylitis', 'cervical and lumbar disc herniation', 'chronic lymphocytic thyroiditis', 'colorectal cancer', 'diabetes insipidus', 'diabetes mellitus', 'gouty arthritis', "graves' disease", "hashimoto's thyroiditis",'hyperthyroidism', 'hypoglycemia', 'hypokalemia', 'hypothyroidism', 'inflammatory bowel disease', 'lada', 'localized scleroderma', 'low t3 syndrome', 'lumbar tuberculosis', 'mixed connective tissue disease', 'myositis', 'osteoarthritis', 'psoriatic arthritis', 'rheumatic fever', 'rheumatoid arthritis', 'scleredema', 'simple goiter', 'simple skin allergy', 'subacute thyroiditis', 'systemic lupus erythematosus', 'type 1 diabetes']
uniqueDiseases2=['Brain Hemorrhage', 'parkinson disease', 'Epilepsy', 'Cerebral Infarction', 'Diabetic Neuropathy', 'Cerebral hemorrhage', 'Lewy Body Dementia', 'Cervical vertigo', 'Tumorous Stroke', 'alzheimer disease', 'myelitis', 'Lyme Disease', 'Multiple sclerosis', 'Acute Myelitis', 'Tumoral Stroke', 'Optic neuritis', 'subarachnoid hemorrhage', 'Tumor Stroke', 'Cranial tumor', 'Vascular Dementia', 'Lyme disease', 'Syncope', 'Pseudoseizures', 'Periodic Paralysis', 'Primary Tremor', 'Diabetic neuropathy', 'facial neuritis', 'Polymyositis', 'transient ischemic attack']
uniqueDiseasesall=[uniqueDiseases1,uniqueDiseases2]
print(len(uniqueDiseases))

58


In [8]:
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"
    
]
departments=["respiratory medicine department",
             "nephrology department",
             "pediatrics department",
             "gynecology department",
             "endocrinology department",   
             "neurology department",
             "cardiac surgical department",                          
             "gastrointestinal surgical department" ]
departments=["nephrology department","gynecology department","endocrinology department","neurology department"]
departments=["neurology department","gynecology department"]
models = ["llama3.1", "gemma2", "phi3:14b", "mistral-nemo"]



In [9]:
laboratory="result"
image="findings"
report_type=f"{laboratory}_{image}"
all_departments={}
for i  in range(len(departments)):
    department=departments[i]
    # unique_diseases=uniqueDiseasesall[i]
    results={}
    print("department is",department)
    # print("uniqueDiseases are",unique_diseases)
    departmentdf=filterDepartment(df,department)
    caseNumbers = [i for i in range(1, len(departmentdf), 5)]
    print(caseNumbers)
    row=departmentdf
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:
        case_details={}        
        case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        print("case_id",case_id)
        print("principal diagnosis",principal_diagnosis)
        # output0=doctor_prompt_gpt_semi_ended(filtered_clinical_case_dict,"gpt-4",unique_diseases,department)
        # output1=doctor_prompt_ollama_semi_ended(filtered_clinical_case_dict,"llama3.1",unique_diseases,department)
        # output2=doctor_prompt_ollama_semi_ended(filtered_clinical_case_dict,"gemma2",unique_diseases,department)
        output0=doctor_prompt_gpt_semi_ended(filtered_clinical_case_dict,"gpt-4",differential_diagnosis,department)
        output1=doctor_prompt_ollama_semi_ended(filtered_clinical_case_dict,"llama3.1",differential_diagnosis,department)
        output2=doctor_prompt_ollama_semi_ended(filtered_clinical_case_dict,"gemma2",differential_diagnosis,department)
        # output3=doctor_prompt_ollama_semi_ended(filtered_clinical_case_dict,"phi3:14b",differential_diagnosis,department)
        
        case_details["gpt-4"]=output0
        case_details["llama3.1"]=output1
        case_details["gemma2"]=output2
        results[str(case_id)]=case_details
    with open(f"{department}_only_differential.json", "w") as outfile: 
        json.dump(results, outfile)

department is neurology department
[1, 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76]
number of principal diagnosis are 8
principal_diagnosis
parkinson disease            10
epilepsy                     10
transient ischemic attack    10
myelitis                     10
cerebral infarction          10
subarachnoid hemorrhage      10
alzheimer disease            10
facial neuritis              10
Name: count, dtype: int64
number of preliminary_diagnosis are 8
case_id 712
principal diagnosis parkinson disease


started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 717
principal diagnosis parkinson disease
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 722
principal diagnosis epilepsy
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 727
principal diagnosis epilepsy
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 732
principal diagnosis transient ischemic attack
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 737
principal diagnosis transient ischemic attack
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 742
principal diagnosis myelitis
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 747
principal diagnosis myelitis
started model  llama3.1
do

In [10]:
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.units import inch
import json

for department in departments:
    with open(f'{department}_only_differential.json', 'r') as file:
        data = json.load(file)

    # Now 'data' is a dictionary (or list, depending on the JSON structure)
    print(data)

    # Create a PDF document
    pdf_file = f"{department}_only_differential.pdf"
    doc = SimpleDocTemplate(pdf_file, pagesize=A4)

    # Define a style for the document
    styles = getSampleStyleSheet()
    style_normal = styles["Normal"]

    # Container for the 'Flowable' objects (Paragraphs, Spacers, etc.)
    content = []

    # Loop through the dictionary and add content to the PDF
    for key, sub_dict in data.items():
        # Add the main key
        content.append(Paragraph(f"Key: {key}", style_normal))
        content.append(Spacer(1, 0.2 * inch))  # Add some space

        # Add each sub-key and its corresponding text
        for sub_key, text in sub_dict.items():
            content.append(Paragraph(f"&nbsp;&nbsp;&nbsp;{sub_key}: {text}", style_normal))
            content.append(Spacer(1, 0.1 * inch))  # Add some space

        # Add a page break after each key
        content.append(PageBreak())

    # Build the PDF
    doc.build(content)

    print(f"PDF generated successfully at {pdf_file}!")


{'712': {'original': {'main-diagnosis': 'parkinson disease', 'differential_diagnosis': ["Vascular Parkinson's Syndrome", 'Progressive Supranuclear Palsy', 'parkinson disease']}, 'gpt-4': "1. **Summarize the medical case.**\n\nThe patient is a 78-year-old male who has been experiencing progressive worsening of right-sided limb weakness for 5 years. The symptoms include heaviness and soreness in the right limb, difficulty in walking, slowness, and right-hand clumsiness. The patient has been treated with various medications, which have only slightly improved the symptoms. In the past six months, the patient's symptoms have progressed, and new symptoms such as difficulty turning over, decreased sense of smell, difficulty in defecation, frequent urination, sleep disturbances at night, and emotional irritability and anger have appeared. The patient has no history of chronic diseases. Physical examination shows no special findings in heart, lung, and abdomen examination. The patient is consci

In [11]:
laboratory="result"
image="findings"
report_type=f"{laboratory}_{image}"
all_departments={}
for i  in range(len(departments)):
    department=departments[i]
    # unique_diseases=uniqueDiseasesall[i]
    results={}
    print("department is",department)
    # print("uniqueDiseases are",unique_diseases)
    departmentdf=filterDepartment(df,department)
    caseNumbers = [i for i in range(1, len(departmentdf), 5)]
    print(caseNumbers)
    row=departmentdf
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:
        case_details={}        
        case_id,principal_diagnosis,differential_diagnosis,clinical_case_dict,filtered_clinical_case_dict=select_case_components(departmentdf,caseNumber,required_fields,laboratory,image)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        print("case_id",case_id)
        print("principal diagnosis",principal_diagnosis)
        output0=doctor_prompt_gpt_open_ended(filtered_clinical_case_dict,"gpt-4",differential_diagnosis,department)
        output1=doctor_prompt_ollama_openended(filtered_clinical_case_dict,"llama3.1",differential_diagnosis,department)
        output2=doctor_prompt_ollama_openended(filtered_clinical_case_dict,"gemma2",differential_diagnosis,department)
        
        case_details["gpt-4"]=output0
        case_details["llama3.1"]=output1
        case_details["gemma2"]=output2
        results[str(case_id)]=case_details
    with open(f"{department}_open_ended.json", "w") as outfile: 
        json.dump(results, outfile)

department is neurology department
[1, 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76]
number of principal diagnosis are 8
principal_diagnosis
parkinson disease            10
epilepsy                     10
transient ischemic attack    10
myelitis                     10
cerebral infarction          10
subarachnoid hemorrhage      10
alzheimer disease            10
facial neuritis              10
Name: count, dtype: int64
number of preliminary_diagnosis are 8
case_id 712
principal diagnosis parkinson disease
the model name is gpt-4
done for model gpt-4
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 717
principal diagnosis parkinson disease
the model name is gpt-4
done for model gpt-4
started model  llama3.1
done for model llama3.1
started model  gemma2
done for model gemma2
case_id 722
principal diagnosis epilepsy
the model name is gpt-4
done for model gpt-4
started model  llama3.1
done for model llama3.1
started model  gem

In [12]:
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.units import inch
import json

for department in departments:
    with open(f'{department}_open_ended.json', 'r') as file:
        data = json.load(file)

    # Now 'data' is a dictionary (or list, depending on the JSON structure)
    print(data)

    # Create a PDF document
    pdf_file = f"{department}_open_ended.pdf"
    doc = SimpleDocTemplate(pdf_file, pagesize=A4)

    # Define a style for the document
    styles = getSampleStyleSheet()
    style_normal = styles["Normal"]

    # Container for the 'Flowable' objects (Paragraphs, Spacers, etc.)
    content = []

    # Loop through the dictionary and add content to the PDF
    for key, sub_dict in data.items():
        # Add the main key
        content.append(Paragraph(f"Key: {key}", style_normal))
        content.append(Spacer(1, 0.2 * inch))  # Add some space

        # Add each sub-key and its corresponding text
        for sub_key, text in sub_dict.items():
            content.append(Paragraph(f"&nbsp;&nbsp;&nbsp;{sub_key}: {text}", style_normal))
            content.append(Spacer(1, 0.1 * inch))  # Add some space

        # Add a page break after each key
        content.append(PageBreak())

    # Build the PDF
    doc.build(content)

    print(f"PDF generated successfully at {pdf_file}!")


PDF generated successfully at neurology department_open_ended.pdf!
{'272': {'original': {'main-diagnosis': 'ovarian cyst', 'differential_diagnosis': ['Ectopic Pregnancy', 'Appendicitis', 'ovarian cyst']}, 'gpt-4': '1. Ectopic Pregnancy\n2. Ovarian Cyst\n3. Pelvic Inflammatory Disease\n4. Appendicitis', 'llama3.1': "Based on the provided medical history, I have identified the top 4 most likely diagnoses for the patient:\n\n1. **Ectopic Pregnancy**: The patient's irregular abdominal pain, nausea, and vomiting could be indicative of an ectopic pregnancy. The ultrasound findings of a quasi-circular low-density shadow in the left adnexal area, approximately 8.5x6.8cm in size, with clear lesion boundaries, are consistent with an ectopic pregnancy.\n\n2. **Ovarian Cyst**: The patient's ultrasound findings also suggest the presence of an ovarian cyst, which could be causing her abdominal pain and discomfort.\n\n3. **Pelvic Inflammatory Disease (PID)**: The patient's lower abdominal tenderness,