loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_output_parser,doctor_prompt_disease_restricted_output_parser2
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json
from src.ollama2 import doctor_prompt_disease_restricted_output_parser_ollama,doctor_prompt_disease_restricted_output_parser_ollama_combined
load_dotenv()

True

importing the dataset

In [2]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

number of total cases are 1500
each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']


filtering the dataset

In [3]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [4]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

number of departments available are 24
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine department                      70
gastroenterology department                          70
neurosurgery department                              70
cardiac surgical department                          70
nephrology department                                60
gastrointestinal surgical department                 60
pediatrics department                                60
thyroid surgical department                  

In [5]:

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

In [35]:
department="gynecology department"#"gynecology department"#"nephrology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)

In [36]:
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 8
principal_diagnosis
ovarian cyst             10
endometrial polyp        10
endometrial cancer       10
uterine fibroid          10
adenomyosis              10
cervical polyp           10
cervical cancer          10
heterotopic pregnancy    10
Name: count, dtype: int64
number of preliminary_diagnosis are 8


In [37]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 46
['Uterine fibroids', 'Incomplete Abortion', 'Vaginal wall mass', 'heterotopic pregnancy', 'cervical polyp', 'Chronic cervicitis', 'Acute appendicitis', 'Uterine sarcoma', 'Atrophic vaginitis', 'Uterine leiomyoma', 'Ovarian tumor', 'Appendicitis', 'Acute salpingitis', 'Cervical lesion', 'Cervical lesions', 'Acute Salpingitis', 'cervical cancer', 'Endometrial Lesions', 'Hemolytic Anemia', 'Endometrial Lesion', 'adenomyosis', 'Acute Appendicitis', 'Complete Abortion', 'endometrial polyp', 'uterine fibroid', 'Adenomyosis', 'Uterine Adenomyoma', 'Gastric cancer', 'Endometrial polyps', 'Endometrial lesion', 'Cervical cancer', 'ovarian cyst', 'Endometrial lesions', 'endometrial cancer', 'Uterine Leiomyoma', 'Ovarian Tumor', 'Hydrosalpinx', 'Endometrial Cancer', "Meniere's Disease", 'Miscarriage', 'Cervical Lesions', 'Endometrial cancer', 'Vaginal Wall Mass', 'Uterine Sarcoma', 'Ectopic Pregnancy', 'Uterine adenomyoma']


In [38]:
disease="Syncope"
print(disease in refined_differential_diseases)
print(disease in uniquePrimary)

False
False


In [39]:
print(disease in uniqueDiseases)

False


In [40]:
doctor="doctor"
model="gpt-4"

In [66]:
caseNumber=63#68
row= departmentdf.iloc[caseNumber]
caseNumber=row.id
medicalHistory=row.clinical_case_summary
principalDiagnosis=row.principal_diagnosis
differentialDiagnosis=row.differential_diagnosis
print("the case id is",caseNumber)
print("\nthe principal diagnosis is",principalDiagnosis)
print("\ndifferential diagnosis is",differentialDiagnosis)
print("\nthe case history is ",medicalHistory)

the case id is 334

the principal diagnosis is cervical cancer

differential diagnosis is ['The diagnosis is clear, no differentiation needed.']

the case history is  {'Patient Basic Information': 'Diagnosed with cervical cancer for 2 years, diarrhea for 1 week.', 'Chief Complaint': 'Middle-aged female, 49 years old.', 'Medical History': "The patient was diagnosed with cervical cancer 2 years ago, and underwent 'open abdominal total hysterectomy + bilateral salpingo-oophorectomy + pelvic lymph node dissection' in the hospital. After the surgery, 'paclitaxel injection (210mg, d1, q21d), carboplatin (450mg, d1, q21d)' was given for 2 cycles of chemotherapy. Followed by pelvic radiotherapy, prescription dose: '95%PTV 5000cGy/25f'. During radiotherapy, 'paclitaxel injection (210mg, d1, q21d), carboplatin (450mg, d1, q21d)' was given for 1 cycle of concurrent chemotherapy. After chemotherapy, the patient's leukocytes decreased, and the chemotherapy regimen was adjusted to 'cisplatin (40mg, 

In [67]:
model="llama3"
diagnosis1=doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)

Based on the patient's medical history, physical examination, lab reports, and image reports, my top 3 most likely diseases are:

**Disease 1: Cervical Cancer**

* Reason: The patient was diagnosed with cervical cancer 2 years ago and underwent surgery and chemotherapy. She had poor appetite, nausea, and vomiting after eating 4 months ago, which is a common symptom of advanced cervical cancer.
* Next step: To confirm the recurrence of cervical cancer, I would recommend performing a biopsy or an imaging study such as CT or PET scan to evaluate any suspicious lesions in the cervix.

**Disease 2: Adenomyosis**

* Reason: The patient had symptoms of diarrhea and yellow watery stools, which can be related to adenomyosis. Additionally, she has a history of cervical cancer, which is often associated with adenomyosis.
* Next step: To confirm the diagnosis of adenomyosis, I would recommend performing an ultrasound or MRI scan to evaluate the size and location of any suspected lesions.

**Diseas

In [68]:
# model="llama3.1"
# diagnosis2=doctor_prompt_disease_restricted_output_parser_ollama(medicalHistory, model, uniquePrimary,department)
model="gpt-4o"
diagnosis2=doctor_prompt_disease_restricted_output_parser(medicalHistory, model, uniquePrimary,department)

Based on the provided medical history, physical examination, lab reports, and imaging findings, here are the top 3 most likely diseases for the patient using differential diagnosis:

1. **Disease Name:** Cervical Cancer
   - **Reason:** The patient has a history of cervical cancer diagnosed 2 years ago, and recent MRI shows cervical cancer invading the cervical stroma. Elevated tumor markers such as CEA, CA125, and HE4 further support the presence of malignancy.
   - **Next Step:** A follow-up MRI with contrast and possibly a PET-CT scan to assess the extent of the disease and any metastasis. Additionally, a biopsy of any suspicious lesions could be considered to confirm the diagnosis.

2. **Disease Name:** Endometrial Cancer
   - **Reason:** Elevated CA125 and HE4 levels can also be indicative of endometrial cancer. The patient's history of multiple gynecological surgeries and chemotherapy could predispose her to secondary malignancies.
   - **Next Step:** Transvaginal ultrasound and 

In [69]:
model="llama3.1"
final_diagnosis=doctor_prompt_disease_restricted_output_parser_ollama_combined(medicalHistory,model,diagnosis1,diagnosis2,department)

Based on the patient's medical history, physical examination, lab reports, and image reports, I have thoroughly investigated both diagnoses provided by the two doctors.

**Disease 1: Cervical Cancer**

As Doctor 1 has mentioned, the patient was diagnosed with cervical cancer 2 years ago and underwent surgery and chemotherapy. The symptoms of poor appetite, nausea, and vomiting after eating 4 months ago are consistent with advanced cervical cancer. However, I would like to note that the current diagnosis is more complex due to the presence of diarrhea and yellow watery stools.

**Disease 2: Adenomyosis**

Doctor 1 has also mentioned adenomyosis as a possible diagnosis, citing symptoms of diarrhea and yellow watery stools. While these symptoms are not uncommon in adenomyosis, I would like to note that they can also be related to other conditions such as intestinal obstruction or even recurrence of cervical cancer.

**My Investigation**

Upon reviewing the patient's medical history, physi

In [70]:
model="gpt-4o"
diagnosis2=doctor_prompt_disease_restricted_output_parser2(medicalHistory, model, uniquePrimary,department)
print(diagnosis2.content)

### Clinical Diagnosis

#### 1. Principal Diagnosis:
**Cervical Cancer Recurrence with Intestinal Involvement**

#### 2. Diagnostic Basis:
- **Medical History:**
  - The patient has a history of cervical cancer diagnosed 2 years ago.
  - Underwent extensive surgical treatment including total hysterectomy, bilateral salpingo-oophorectomy, and pelvic lymph node dissection.
  - Received multiple cycles of chemotherapy and radiotherapy.
  - History of multiple intestinal obstructions and hypoglycemic coma in the past 3 months.
  - Recent symptoms include poor appetite, nausea, vomiting, and diarrhea (4-6 times/day with yellow watery stools).

- **Physical Examination:**
  - No obvious abnormalities observed, which may indicate that the symptoms are more related to internal pathology rather than external physical signs.

- **Imaging Examination:**
  - MRI shows cervical cancer invading the cervical stroma, suggesting possible local recurrence or residual disease.

- **Laboratory Examination