loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_output_parser,doctor_prompt_disease_restricted_output_parser2
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json
load_dotenv()

True

importing the dataset

In [2]:
filePath="dataset\clinicallab\data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

number of total cases are 1500
each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']


filtering the dataset

In [3]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [4]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

number of departments available are 24
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
otolaryngology head and neck surgical department     80
neurology department                                 80
endocrinology department                             80
gynecology department                                80
neurosurgery department                              70
thoracic surgical department                         70
respiratory medicine department                      70
cardiac surgical department                          70
gastroenterology department                          70
pediatrics department                                60
nephrology department                                60
gastrointestinal surgical department                 60
thyroid surgical department                  

In [5]:

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

In [6]:
department="nephrology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"{department}.json", orient='records', lines=False, indent=4)

In [7]:
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 6
principal_diagnosis
acute pyelonephritis       10
acute renal failure        10
chronic kidney disease     10
urinary tract infection    10
glomerulonephritis         10
nephrotic syndrome         10
Name: count, dtype: int64
number of preliminary_diagnosis are 6


In [8]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 17
['acute renal failure', 'Syringomyelia', 'glomerulonephritis', 'Multiple Sclerosis', 'nephrotic syndrome', 'Acute Kidney Injury', 'chronic kidney disease', 'Acute renal failure', 'Urolithiasis', 'urinary tract infection', 'Acute kidney injury', 'Urinary Calculi', 'Urinary Tumors', 'Urinary tract tumor', 'Urogenital Tumors', 'acute pyelonephritis', 'Urinary Tract Tumor']


In [9]:
disease="Syncope"
print(disease in refined_differential_diseases)
print(disease in uniquePrimary)

False
False


In [10]:
print(disease in uniqueDiseases)

False


In [11]:
doctor="doctor"
model="gpt-4"

In [17]:
caseNumber=25#68
row= departmentdf.iloc[caseNumber]
caseNumber=row.id
medicalHistory=row.clinical_case_summary
principalDiagnosis=row.principal_diagnosis
differentialDiagnosis=row.differential_diagnosis
print("the case id is",caseNumber)
print("\nthe principal diagnosis is",principalDiagnosis)
print("\ndifferential diagnosis is",differentialDiagnosis)
print("\nthe case history is ",medicalHistory)

the case id is 1166

the principal diagnosis is chronic kidney disease

differential diagnosis is ['Acute Kidney Injury: There is often a clear cause, with oliguria, anuria, and increased creatinine occurring in a short period of time. There is generally no severe anemia, and both kidneys do not atrophy. Kidney function can return to normal after the cause is corrected. This patient has no obvious cause of the disease, combined with anemia, the possibility of chronic kidney disease is higher, but it is still necessary to actively complete relevant Auxiliary Examination to rule out reversible factors.']

the case history is  {'Patient Basic Information': 'Elderly male, 65 years old.', 'Chief Complaint': 'Found blood creatinine increased for 2 years, fatigue and abdominal distension for 8 months.', 'Medical History': "The patient was found to have a blood creatinine level of over 200 umol/L during a physical examination 2 years ago, with edema in both lower limbs, presenting as symmetric

In [18]:
# response=doctor_prompt_disease_restricted_output_parser(medicalHistory, model, uniqueDiseases)
response=doctor_prompt_disease_restricted_output_parser(medicalHistory, model, uniquePrimary)


Parsed output:
{
  "diagnosis": "chronic kidney disease",
  "reasoning": "The patient has a history of diabetes and hypertension, both of which are risk factors for chronic kidney disease. The patient's symptoms of fatigue, abdominal distension, decreased urine output, and edema in both lower limbs are consistent with chronic kidney disease. The patient's laboratory results show elevated levels of urea and creatinine, which are markers of kidney function, and a decreased red blood cell count, which can occur in chronic kidney disease due to decreased erythropoietin production by the kidneys. The patient's blood biochemistry also shows abnormalities in electrolyte levels, including hyperkalemia and hyponatremia, which can occur in chronic kidney disease due to the kidneys' role in electrolyte balance. The patient's ultrasound findings of abdominal fluid could be due to fluid retention, which can occur in chronic kidney disease due to decreased kidney function.",
  "further_examinations

In [19]:
response=doctor_prompt_disease_restricted_output_parser2(medicalHistory, model, uniquePrimary)


Parsed output:
{
  "diagnosis": [
    "chronic kidney disease",
    "acute renal failure",
    "nephrotic syndrome"
  ],
  "reasoning": "The patient's medical history and laboratory results indicate a long-term kidney problem. The patient has had an elevated blood creatinine level for 2 years, which is a sign of kidney dysfunction. The patient's symptoms of fatigue, abdominal distension, decreased urine output, and edema in both lower limbs are consistent with chronic kidney disease. The patient's laboratory results show a significantly elevated creatinine level (931 umol/L), which is a sign of acute renal failure. The patient's symptoms of foamy urine and edema in both lower limbs are also consistent with nephrotic syndrome. The patient's history of diabetes and hypertension, both of which are risk factors for kidney disease, further support these diagnoses.",
  "further_examinations": "The patient should undergo a kidney biopsy to confirm the diagnosis of chronic kidney disease and 

In [20]:
uniquePrimary

['acute pyelonephritis',
 'acute renal failure',
 'chronic kidney disease',
 'urinary tract infection',
 'glomerulonephritis',
 'nephrotic syndrome']