loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_output_parser
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json
from src.ollama2 import doctor_prompt_disease_restricted_output_parser_ollama
load_dotenv()

True

importing the dataset

In [2]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

number of total cases are 1500
each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']


filtering the dataset

In [3]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [4]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

number of departments available are 24
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine department                      70
gastroenterology department                          70
neurosurgery department                              70
cardiac surgical department                          70
nephrology department                                60
gastrointestinal surgical department                 60
pediatrics department                                60
thyroid surgical department                  

In [5]:

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

In [23]:
department="gynecology department"#"nephrology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)

In [24]:
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 8
principal_diagnosis
ovarian cyst             10
endometrial polyp        10
endometrial cancer       10
uterine fibroid          10
adenomyosis              10
cervical polyp           10
cervical cancer          10
heterotopic pregnancy    10
Name: count, dtype: int64
number of preliminary_diagnosis are 8


In [25]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 46
['endometrial polyp', 'cervical polyp', 'Endometrial Cancer', 'heterotopic pregnancy', 'Endometrial cancer', 'Ovarian Tumor', 'Acute Appendicitis', 'Hydrosalpinx', 'Cervical lesions', 'Endometrial lesions', 'Gastric cancer', 'Acute appendicitis', 'Endometrial Lesion', 'Acute salpingitis', 'Uterine Sarcoma', 'Vaginal wall mass', 'Complete Abortion', 'Miscarriage', 'Cervical Lesions', 'Hemolytic Anemia', 'Cervical lesion', 'Ovarian tumor', 'Acute Salpingitis', 'Chronic cervicitis', 'Uterine Adenomyoma', 'endometrial cancer', 'Incomplete Abortion', 'Uterine fibroids', 'Ectopic Pregnancy', 'Endometrial Lesions', "Meniere's Disease", 'Cervical cancer', 'Atrophic vaginitis', 'Appendicitis', 'Endometrial polyps', 'uterine fibroid', 'Uterine leiomyoma', 'Uterine sarcoma', 'Vaginal Wall Mass', 'Uterine adenomyoma', 'adenomyosis', 'ovarian cyst', 'cervical cancer', 'Uterine Leiomyoma', 'Adenomyosis', 'Endometrial lesion']


In [26]:
disease="Syncope"
print(disease in refined_differential_diseases)
print(disease in uniquePrimary)

False
False


In [27]:
print(disease in uniqueDiseases)

False


In [28]:
doctor="doctor"
model="gpt-4"

In [34]:
caseNumber=29#68
row= departmentdf.iloc[caseNumber]
caseNumber=row.id
medicalHistory=row.clinical_case_summary
principalDiagnosis=row.principal_diagnosis
differentialDiagnosis=row.differential_diagnosis
print("the case id is",caseNumber)
print("\nthe principal diagnosis is",principalDiagnosis)
print("\ndifferential diagnosis is",differentialDiagnosis)
print("\nthe case history is ",medicalHistory)

the case id is 300

the principal diagnosis is endometrial cancer

differential diagnosis is ['Endometrial Hyperplasia: The patient has abnormal menstrual changes, and the endometrium is thickened. This diagnosis cannot be ruled out. Further hysteroscopy and curettage can assist in diagnosis.']

the case history is  {'Patient Basic Information': 'Middle-aged female, 44 years old.', 'Chief Complaint': 'Abnormal menstrual changes for 4 years.', 'Medical History': "The patient's menstruation was normal in the past, 7/30 days, heavy flow, no obvious dysmenorrhea. 4 years ago, abnormal menstrual changes occurred without obvious cause, 7-10/23 days, heavy flow, accompanied by blood clots. Since the onset of the disease, the patient has no abdominal pain, no nausea and vomiting, no diarrhea, clear consciousness, good spirit, good eating and sleeping, normal bowel movements. The patient's usual health is general, with no history of hypertension, no history of coronary heart disease, no history

In [35]:
# response=doctor_prompt_disease_restricted_output_parser(medicalHistory, model, uniquePrimary,department)

In [36]:
model="llama3"
doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)

Based on the patient's medical history, physical examination, lab reports, and image reports, I have identified the top 3 most likely diseases as follows:

**Disease 1:** Cervical Cancer
**Reason:** The patient has an abnormal cervical mass that bleeds upon touch, which is a concerning feature for cervical cancer. The MRI plain scan also shows multiple small cysts in the cervix, which could be indicative of cervical cancer.
**Next-Step:** Further imaging studies such as CT or PET scans to assess the extent of the disease and rule out other possible causes of the abnormal cervical mass.

**Disease 2:** Cervical Polyp
**Reason:** The patient has a fragile tissue at the external os of the cervix that bleeds upon touch, which could be consistent with a cervical polyp. The MRI plain scan also shows multiple small cysts in the cervix, which could be indicative of a cervical polyp.
**Next-Step:** Endocervical curettage or biopsy to confirm the diagnosis and rule out other possible causes of t

True

In [37]:
model="llama3.1"
doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)

Based on the provided medical history, I have identified the top 3 most likely diseases of the patient using differential diagnosis. Here are my findings:

**Disease 1: Uterine Fibroid**

* **Name:** Uterine Fibroid
* **Reason:** The patient has a uterus that is enlarged to the size of 2+ months of pregnancy, with no tenderness. This suggests uterine fibroids, which can cause uterine enlargement and heavy menstrual bleeding.
* **Next-step:** MRI or ultrasound imaging may be needed to further confirm the presence and characteristics of the fibroid(s).

**Disease 2: Endometrial Cancer**

* **Name:** Endometrial Cancer
* **Reason:** The patient has abnormal menstrual changes for 4 years, including heavy flow and blood clots. She also has a mass of tissue at the external os of the cervix that bleeds upon touch. The MRI report mentions "Endometrial carcinoma" as a possible diagnosis.
* **Next-step:** Endometrial biopsy or dilation and curettage (D&C) may be needed to obtain tissue samples f

True

In [33]:
model="meditron"
# doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)