loading required libraries

In [22]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_output_parser
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json
from src.ollama2 import doctor_prompt_disease_restricted_output_parser_ollama,doctor_prompt_disease_restricted_output_parser_ollama_combined
load_dotenv()

True

importing the dataset

In [23]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

number of total cases are 1500
each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']


filtering the dataset

In [24]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [25]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

number of departments available are 24
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine department                      70
gastroenterology department                          70
neurosurgery department                              70
cardiac surgical department                          70
nephrology department                                60
gastrointestinal surgical department                 60
pediatrics department                                60
thyroid surgical department                  

In [26]:

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

In [44]:
department="nephrology department"#"gynecology department"#"nephrology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)

In [45]:
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 6
principal_diagnosis
acute pyelonephritis       10
acute renal failure        10
chronic kidney disease     10
urinary tract infection    10
glomerulonephritis         10
nephrotic syndrome         10
Name: count, dtype: int64
number of preliminary_diagnosis are 6


In [46]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 17
['Acute renal failure', 'Urinary Tumors', 'acute pyelonephritis', 'glomerulonephritis', 'Urogenital Tumors', 'Urinary Calculi', 'urinary tract infection', 'chronic kidney disease', 'Acute Kidney Injury', 'Urinary Tract Tumor', 'Acute kidney injury', 'Multiple Sclerosis', 'Syringomyelia', 'Urolithiasis', 'Urinary tract tumor', 'nephrotic syndrome', 'acute renal failure']


In [47]:
disease="Syncope"
print(disease in refined_differential_diseases)
print(disease in uniquePrimary)

False
False


In [48]:
print(disease in uniqueDiseases)

False


In [49]:
doctor="doctor"
model="gpt-4"

In [54]:
caseNumber=38#68
row= departmentdf.iloc[caseNumber]
caseNumber=row.id
medicalHistory=row.clinical_case_summary
principalDiagnosis=row.principal_diagnosis
differentialDiagnosis=row.differential_diagnosis
print("the case id is",caseNumber)
print("\nthe principal diagnosis is",principalDiagnosis)
print("\ndifferential diagnosis is",differentialDiagnosis)
print("\nthe case history is ",medicalHistory)

the case id is 1179

the principal diagnosis is urinary tract infection

differential diagnosis is ['Urogenital Tuberculosis: Symptoms may include frequent urination, urgency, and pain during urination. Often accompanied by afternoon low fever, night sweats, and a history of pulmonary tuberculosis. Urine smear may reveal acid-fast bacilli, PPD test positive, and special radiographic changes.', 'Urogenital Tumors: May present with low fever and hematuria. Urinalysis typically shows no leukocytes, urine culture is negative, and tumor markers usually have abnormal changes.', 'Urolithiasis: Symptoms may also include back pain and soreness. Urinalysis may reveal red and white blood cells. Ultrasound and other examinations can detect stones or ureteral dilation for diagnosis.']

the case history is  {'Patient Basic Information': 'Elderly female, 70 years old.', 'Chief Complaint': 'Intermittent fever for 2 months, frequent urination and urgency for 1 week.', 'Medical History': "The patient ha

In [55]:
model="llama3"
diagnosis1=doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)

Based on the patient's medical history, physical examination, lab reports, and imaging results, my top 3 most likely diseases are:

**Disease 1:** `urinary tract infection`

* Reason: The patient has a history of frequent urination and urgency, which is consistent with UTI. The laboratory tests show positive nitrite (NIT) and bacterial count (BACT), suggesting the presence of bacteria in the urinary tract.
* Next-step: Further examination to rule out any underlying conditions that may have contributed to the recurrent UTIs, such as chronic kidney disease or nephrolithiasis.

**Disease 2:** `acute pyelonephritis`

* Reason: The patient has had a history of intermittent fever and flank pain, which could be indicative of acute pyelonephritis. The laboratory tests show an elevated white blood cell count (WBC) in the urine, suggesting inflammation in the kidneys.
* Next-step: Further imaging studies such as CT or MRI to assess for any kidney damage or abscesses.

**Disease 3:** `chronic kid

In [56]:
# model="llama3.1"
# diagnosis2=doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)
model="gpt-4o"
diagnosis2=doctor_prompt_disease_restricted_output_parser(medicalHistory, model, uniquePrimary,department)

Based on the provided medical history, physical examination, lab reports, and imaging examination, here are the top 3 most likely diseases for the patient:

1. **Disease Name:** Urinary Tract Infection (UTI)
   - **Reason:** The patient has a history of frequent urination and urgency, positive nitrite in urine, elevated white blood cell count in urine, and presence of Escherichia coli in urine culture. These findings are highly indicative of a urinary tract infection.
   - **Next Step:** Perform a sensitivity test on the urine culture to determine the most effective antibiotic for treatment. Additionally, consider a follow-up urine analysis after treatment to ensure the infection has been resolved.

2. **Disease Name:** Acute Pyelonephritis
   - **Reason:** The patient has experienced right-sided back pain, fever, chills, nausea, and vomiting, which are classic symptoms of acute pyelonephritis. The elevated white blood cell count and presence of bacteria in the urine further support th

In [57]:
model="llama3.1"
final_diagnosis=doctor_prompt_disease_restricted_output_parser_ollama_combined(medicalHistory,model,diagnosis1,diagnosis2,department)

After thoroughly investigating both diagnoses and considering the patient's medical history, physical examination, lab reports, and imaging results, I have come to a conclusion.

**Disease-name:** `Acute Pyelonephritis`

**Reason:** Based on the patient's symptoms of intermittent fever, flank pain, chills, nausea and vomiting, frequent urination and urgency, which are all indicative of acute pyelonephritis. The laboratory tests show an elevated white blood cell count (WBC) in the urine, suggesting inflammation in the kidneys. Additionally, the patient has a history of recurrent UTIs, which could be contributing to the development of acute pyelonephritis.

While urinary tract infection (UTI) is also a possibility, I believe that acute pyelonephritis is a more likely diagnosis given the presence of flank pain and fever, which are characteristic symptoms of this condition. Chronic kidney disease is less likely given the absence of clear evidence of renal dysfunction or proteinuria in the 