loading required libraries

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
import json
from dotenv import load_dotenv
from src.llm import doctor_prompt_disease_restricted_gpt
from src.utils import convert_string_to_list,convert_clinical_case_summary,filterDepartment,getDepartmentStatistics,convert_cases_to_json
from src.ollama2 import doctor_prompt_disease_restricted_ollama,doctor_prompt_disease_restricted_ollama_combined
load_dotenv()

importing the dataset

In [None]:
filePath="dataset/clinicallab/data_en.json"
with open(filePath, 'r', encoding='utf-8') as f:
            data = json.load(f)
print("number of total cases are",len(data))
print("each case have the following fields",list(data[0].keys()))

filtering the dataset

In [None]:

keys_to_include = ["id",'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis',
                   'diagnostic_basis', 'differential_diagnosis', 
                   'treatment_plan', 'clinical_case_summary', 'imageological_examination', 
                   'laboratory_examination', 'pathological_examination', 'therapeutic_principle']
df = pd.DataFrame([{key: d[key] for key in keys_to_include} for d in data])

In [None]:
allDepartments=df['clinical_department'].value_counts()
print("number of departments available are",len(allDepartments))

print(allDepartments)

In [None]:

df['preliminary_diagnosis'] = df['preliminary_diagnosis'].apply(convert_string_to_list)
df['diagnostic_basis'] = df['diagnostic_basis'].apply(convert_string_to_list)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(convert_string_to_list)
df['treatment_plan'] = df['treatment_plan'].apply(convert_string_to_list)
df["clinical_case_summary"] = df["clinical_case_summary"].apply(convert_clinical_case_summary)

In [None]:
department="gynecology department"#"respiratory medicine department"#"nephrology department"##"pediatrics department"#"gynecology department"
departmentdf=filterDepartment(df,department)
departmentdf.to_json(f"dataset/clinicallab/department/{department}.json", orient='records', lines=False, indent=4)

In [None]:
getDepartmentStatistics(departmentdf)

In [None]:
# Function to extract disease names from a single row
def extract_disease_names_from_row(differential_diagnosis_list):
    disease=[entry.split(":")[0].strip() for entry in differential_diagnosis_list]
    return disease

# Apply the function to each row and combine all lists into one
differential_diseases = departmentdf["differential_diagnosis"].apply(extract_disease_names_from_row).sum()
refined_differential_diseases=[]
for disease in differential_diseases:
    if len(disease) <20:
        refined_differential_diseases.append(disease)
uniqueDiseases=departmentdf["principal_diagnosis"].unique().tolist()
uniquePrimary=uniqueDiseases[:]
uniqueDiseases.extend(refined_differential_diseases)
uniqueDiseases=list(set(uniqueDiseases))
print("number of unique diseases are",len(uniqueDiseases))
print(uniqueDiseases)

In [None]:

# caseNumbers=[3,13,23,33,43,53]
# for caseNumber in caseNumbers:
#     row= departmentdf.iloc[caseNumber]
#     caseNumber=row.id
#     medicalHistory=row.clinical_case_summary
#     principalDiagnosis=row.principal_diagnosis
#     differentialDiagnosis=row.differential_diagnosis
#     combined_report={"case-id":caseNumber,"principal diagnosis":principalDiagnosis,"differential diagnosis":differential_diseases,"medicalHistory":medicalHistory}
#     print("the case id is",caseNumber)
#     print("\nthe principal diagnosis is",principalDiagnosis)
#     print("\ndifferential diagnosis is",differentialDiagnosis)
#     print("\nthe case history is ",medicalHistory)
#     model="llama3"
#     diagnosis1=doctor_prompt_disease_restricted_ollama(medicalHistory, model, uniquePrimary,department)
#     print("diagnosis from llama is",diagnosis1)
#     model="llama3.1"
#     diagnosis2=doctor_prompt_disease_restricted_ollama(medicalHistory, model, uniquePrimary,department)
#     print("diagnosis from llama3.1 is",diagnosis2)
#     model="gpt-4o"
#     diagnosis3=doctor_prompt_disease_restricted_gpt(medicalHistory, model, uniquePrimary,department)
#     print("diagnosis from gpt-4 is",diagnosis2)

In [None]:
from fpdf import FPDF

class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Medical Case Report', 0, 1, 'C')

    def chapter_title(self, case_id):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, f'Case ID: {case_id}', 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, model_name, diagnosis):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, f'Diagnosis from {model_name}: {diagnosis}'.encode('latin-1', 'replace').decode('latin-1'))
        self.ln(10)

    def add_case(self, case_id, principalDiagnosis, differentialDiagnosis, medicalHistory, diagnosis1, diagnosis2, diagnosis3):
        self.add_page()
        self.chapter_title(case_id)

        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, f'Principal Diagnosis: {principalDiagnosis}'.encode('latin-1', 'replace').decode('latin-1'))
        self.ln(5)
        self.multi_cell(0, 10, f'Differential Diagnosis: {differentialDiagnosis}'.encode('latin-1', 'replace').decode('latin-1'))
        self.ln(5)
        self.multi_cell(0, 10, f'Medical History: {medicalHistory}'.encode('latin-1', 'replace').decode('latin-1'))
        self.ln(10)

        # Add a page for each diagnosis
        self.add_page()
        self.chapter_body("llama3", diagnosis1)
        
        self.add_page()
        self.chapter_body("llama3.1", diagnosis2)
        
        self.add_page()
        self.chapter_body("gpt-4o", diagnosis3)

# Initialize the PDF
pdf = PDF()
pdf.set_left_margin(10)
pdf.set_right_margin(10)

# Iterate through the cases and generate the report
caseNumbers = [3,13,23,33,43]  # You can include more cases here
for caseNumber in caseNumbers:
    print(caseNumber)
    row = departmentdf.iloc[caseNumber]
    case_id = row.id
    medicalHistory = row.clinical_case_summary
    principalDiagnosis = row.principal_diagnosis
    differentialDiagnosis = row.differential_diagnosis

    diagnosis1 = doctor_prompt_disease_restricted_ollama(medicalHistory, "llama3", uniquePrimary, department)
    print("done llama3")
    diagnosis2 = doctor_prompt_disease_restricted_ollama(medicalHistory, "llama3.1", uniquePrimary, department)
    print("done llama3.1")
    diagnosis3 = doctor_prompt_disease_restricted_gpt(medicalHistory, "gpt-4o", uniquePrimary, department)
    print("done gpt-4o")

    pdf.add_case(case_id, principalDiagnosis, differentialDiagnosis, medicalHistory, diagnosis1, diagnosis2, diagnosis3)

# Output the PDF to a file
pdf_file_path = "medical_case_report.pdf"
pdf.output(pdf_file_path)

print(f"PDF report generated: {pdf_file_path}")


In [None]:
# model="llama3"
# diagnosis1=doctor_prompt_disease_restricted_ollama(medicalHistory, model, uniquePrimary,department)
# print(diagnosis1)