loading required libraries

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_gpt
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json,PDF
from src.ollama2 import doctor_prompt_disease_restricted_ollama,doctor_prompt_disease_restricted_ollama_combined
load_dotenv()

True

importing the dataset

In [2]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

number of total cases are 1500
each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']


filtering the dataset

In [3]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis',
                   'diagnostic_basis', 'differential_diagnosis', 
                   'treatment_plan', 'clinical_case_summary', 'imageological_examination', 
                   'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [4]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

number of departments available are 24
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine department                      70
gastroenterology department                          70
neurosurgery department                              70
cardiac surgical department                          70
nephrology department                                60
gastrointestinal surgical department                 60
pediatrics department                                60
thyroid surgical department                  

In [5]:
department="gynecology department"#"respiratory medicine department"#"nephrology department"##"pediatrics department"#"gynecology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)

In [6]:
getDepartmentStatistics(departmentdf)

number of principal diagnosis are 8
principal_diagnosis
ovarian cyst             10
endometrial polyp        10
endometrial cancer       10
uterine fibroid          10
adenomyosis              10
cervical polyp           10
cervical cancer          10
heterotopic pregnancy    10
Name: count, dtype: int64
number of preliminary_diagnosis are 8


In [7]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

number of unique diseases are 46
['cervical polyp', 'Ovarian Tumor', 'Cervical lesion', 'Cervical Lesions', 'Endometrial cancer', 'Endometrial Lesion', 'Appendicitis', 'Ovarian tumor', 'heterotopic pregnancy', 'Acute Salpingitis', 'uterine fibroid', 'Gastric cancer', 'Uterine Sarcoma', 'Endometrial polyps', 'Uterine Leiomyoma', 'Vaginal Wall Mass', 'Endometrial lesions', 'Uterine sarcoma', 'Atrophic vaginitis', 'Uterine Adenomyoma', 'Acute Appendicitis', 'Uterine adenomyoma', 'Endometrial lesion', "Meniere's Disease", 'Adenomyosis', 'Miscarriage', 'Hydrosalpinx', 'endometrial cancer', 'Uterine fibroids', 'Chronic cervicitis', 'cervical cancer', 'adenomyosis', 'Hemolytic Anemia', 'Endometrial Cancer', 'Ectopic Pregnancy', 'Complete Abortion', 'endometrial polyp', 'Uterine leiomyoma', 'Acute appendicitis', 'Incomplete Abortion', 'ovarian cyst', 'Vaginal wall mass', 'Acute salpingitis', 'Cervical cancer', 'Cervical lesions', 'Endometrial Lesions']


In [8]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
import json
def doctor_prompt_disease_restricted_ollama(medical_history, model, diseases, department):
    model1=model
    model = OllamaLLM(model=model,temperature=0.1,num_predict=1000,num_ctx=12000)#4096)


    # Create the system message
    system_template = """You are a experienced doctor from {department} and you will be provided with a medical history of a patient containing the past medical history
    ,physical examination,laboratory examination and Imaging examination results.Your task is to identify the top 3 most likely diseases of the patient using differential diagnosis using given below diseases 
    the possible set of diseases are {diseases}
    Analyze by thinking step by step each physical examination,laboratory examination and Imaging examination based on above disases
    Once it is done select the top possible disease using above analysis and differential diagnosis.I need you to not miss any examination reports and think step by step what each
    examination report suggest
    output should be formated in the following format
    **Medical Examination Analysis**
    ***Physical Examination***
    ***Laboratory Examination:***
    ***Imaging Examination***
    
    **Differential Diagnosis**
    1.disease1:Detailed reasons based on the case history
    2.disease2:Detailed reasons based on the case history
    3.disease3:Detailed reasons based on the case history
    4.disease4:Detailed reasons based on the case history
    
    **Final Diagnosis""
    ***Name of the most possible disease***
    ****possible reasons****
    Detailed reasons based on past medical history,physical examination,laboratory examination and image examination
    """
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

    # Create the human message
    human_template = "Patient's medical history: {medical_history}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    # Create the chat prompt
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    chain = chat_prompt | model

    results=chain.invoke({"department": department,"diseases": diseases,"medical_history":json.dumps(medical_history)})
    print("done for model",model1)
    return results


In [9]:
departments=["respiratory medicine department",
             "nephrology department",
             "pediatrics department",
             "gynecology department",
             "endocrinology department",
             "gynecology department",    
             "neurology department",
             "cardiac surgical department",                          
             "gastrointestinal surgical department" ]
for department in departments:
    print("department is",department)
    departmentdf=filterDepartment(df,department)
    print(len(departmentdf))
    caseNumbers = [i for i in range(1, len(departmentdf), 10)]
    print(caseNumbers)
    differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
    refined_differential_diseases=[]
    for disease in differential_diseases:
        if len(disease) <20:
            refined_differential_diseases.append(disease)
    uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
    uniquePrimary=uniqueDiseases[:]
    uniqueDiseases.extend(refined_differential_diseases)
    uniqueDiseases=list(set(uniqueDiseases))
    
    pdf = PDF()
    pdf.set_left_margin(10)
    pdf.set_right_margin(10)
    getDepartmentStatistics(departmentdf)
    for caseNumber in caseNumbers:
        print(caseNumber)
        row = departmentdf.iloc[caseNumber]
        case_id = row.id
        medicalHistory = row.clinical_case_summary
        principalDiagnosis = row.principal_diagnosis
        differentialDiagnosis = row.differential_diagnosis

    #     # List of model-diagnosis pairs
        diagnoses = [
            # ("llama3", doctor_prompt_disease_restricted_ollama(medicalHistory, "llama3", uniquePrimary, department)),
            ("llama3.1", doctor_prompt_disease_restricted_ollama(medicalHistory, "llama3.1", uniquePrimary, department)),
            ("gpt-4", doctor_prompt_disease_restricted_gpt(medicalHistory, "gpt-4", uniquePrimary, department)),
            ("mistral", doctor_prompt_disease_restricted_ollama(medicalHistory, "mistral", uniquePrimary, department)),
            ("gemma2", doctor_prompt_disease_restricted_ollama(medicalHistory, "gemma2", uniquePrimary, department)),
            ("phi3", doctor_prompt_disease_restricted_ollama(medicalHistory, "phi3:14b", uniquePrimary, department)),
            ("mistral-nemo", doctor_prompt_disease_restricted_ollama(medicalHistory, "mistral-nemo", uniquePrimary, department)),
        ]
        
        print("done diagnosing")
        pdf.add_case(case_id, principalDiagnosis, differentialDiagnosis, medicalHistory, diagnoses)

    # Output the PDF to a file
    pdf_file_path = f"./medical-reports/medical_case_report_{department}.pdf"
    pdf.output(pdf_file_path)

    print(f"PDF report generated: {pdf_file_path}")

department is respiratory medicine department
70
[1, 11, 21, 31, 41, 51, 61]
number of principal diagnosis are 7
principal_diagnosis
respiratory failure                      10
chronic obstructive pulmonary disease    10
bronchiectasis                           10
pulmonary embolism                       10
pulmonary tuberculosis                   10
lung abscess                             10
pulmonary infection                      10
Name: count, dtype: int64
number of preliminary_diagnosis are 7
1
done for model llama3.1


  warn_deprecated(
  warn_deprecated(


done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
11
done for model llama3.1
done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
21
done for model llama3.1
done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
31
done for model llama3.1
done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
41
done for model llama3.1
done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
51
done for model llama3.1
done for model gpt-4
done for model mistral
done for model gemma2
done for model phi3:14b
done for model mistral-nemo
done diagnosing
61
done for model llama3.1
done for model gpt-4
done for mode