### loading setup to run ollama

In [None]:
!sudo apt update
!sudo apt install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
import threading
import subprocess
import time

def run_ollama_serve():
  subprocess.Popen(["ollama", "serve"])

thread = threading.Thread(target=run_ollama_serve)
thread.start()
time.sleep(5)

In [None]:
!ollama pull llama3.1

### loading google zip

In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/diagnosis-assistant')
print("Current Working Directory:", os.getcwd())
!pip install -r requirements.txt

### loading required libraries

In [1]:
import os
import pandas as pd
pd.set_option('display.max_colwidth', 5000)
from dotenv import load_dotenv
import json
from src.utils import (
    load_preprocess_data,
    run_prediction,
    extract_disease_names,
    select_case_components_based_on_id,
    unzip_flatten_top_level,
    zip_folder)
load_dotenv()
from prompts import single_shot_disease_only_prompt,with_reasons_prompt,compare_others_prompt,self_refinement_prompt,compare_others_prompt_without_mine

### importing the dataset

In [2]:
required_fields=[ "Patient basic information",
                 "Chief complaint",
                 "Medical history",
                 "Physical examination",
                 "Laboratory examination",
                 "Imageological examination",
                 "Auxillary examination",
                 "Pathological examination"

]
departments=["nephrology department",
            "gynecology department",
            "endocrinology department",
            "neurology department",
             "pediatrics department",
             "cardiac surgical department",
             "gastrointestinal surgical department",
             "respiratory medicine department",
             "gastroenterology department",
             "urinary surgical department",
             "hepatobiliary and pancreas surgical department",
             "hematology department"
            ]

In [4]:
filePath="dataset/clinicallab/data_en.json"
df=load_preprocess_data(filePath)
df['differential_diagnosis'] = df['differential_diagnosis'].apply(extract_disease_names)
filtered_df = df[df['differential_diagnosis'].apply(lambda x: len(x) > 1)]
filtered_df['differential_diagnosis'] = filtered_df['differential_diagnosis'] + filtered_df['principal_diagnosis'].apply(lambda x: [x] if x else [])
df=filtered_df
print(len(df))


number of total cases are 1500

each case have the following fields ['id', 'clinical_case_uid', 'language', 'clinical_department', 'principal_diagnosis', 'preliminary_diagnosis', 'diagnostic_basis', 'differential_diagnosis', 'treatment_plan', 'clinical_case_summary', 'imageological_examination', 'laboratory_examination', 'pathological_examination', 'therapeutic_principle']

number of departments available are 24

 all the departments available are
clinical_department
orthopedics department                              100
anus and intestine surgical department              100
hepatobiliary and pancreas surgical department       99
urinary surgical department                          90
endocrinology department                             80
gynecology department                                80
otolaryngology head and neck surgical department     80
neurology department                                 80
thoracic surgical department                         70
respiratory medicine de

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['differential_diagnosis'] = filtered_df['differential_diagnosis'] + filtered_df['principal_diagnosis'].apply(lambda x: [x] if x else [])


### running prediction for single_shot,with_reasons and open_ended

In [None]:
# from src.utils import (
#     run_prediction,
# )
# from prompts import single_shot_disease_only_prompt,with_reasons_prompt,compare_others_prompt,self_refinement_prompt
# type="with_reasons"
# prompt=with_reasons_prompt
# type="single_shot"
# prompt=single_shot_disease_only_prompt
# # type="open_ended"
# # # prompt=open_ended__top4_prompt#
# run_prediction(df,prompt,departments,models=models,type=type,skip=5)

### evaluate the results

In [None]:

# import os
# import json
# from src.utils import evaluate_department_results
# types=["single_shot","with_reasons","check_others_input"]

# for type in types:
#     folder=os.path.join("results",type)
#     print("type is",type)
#     all_files=os.listdir(folder)
#     all_results={}
#     for file in all_files:
#         department_name=file.split("_")[0]
#         print(department_name)
#         if type in file:
#             file_path=os.path.join(folder,file)
#             department_name=file.split("_")[0]
#             with open(file_path, "r") as file:
#                 data = json.load(file)
#                 results=evaluate_department_results(data)
#             all_results[department_name]=results

#     with open(f"results/analysis/{type}.json", "w") as outfile:
#         json.dump(all_results, outfile)

In [5]:
import json
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.chat_models import ChatOpenAI
def get_diagnosis_by_other_models_by_id(id,department_results,models):
    all_diagnosis=[]
    for model in models:
        diagnosis=department_results[id]["predictions"][model]
        diagnosis= diagnosis.replace("\n", "")
        diagnosis= diagnosis.replace("```json", "")
        diagnosis= diagnosis.replace("```", "")
        all_diagnosis.append(diagnosis)
    return all_diagnosis

def compare_others_ollama(prompt,medical_history, modelname, diseases, department,doctor_diagnosis=[]):
    model = OllamaLLM(model=modelname,temperature=0.1,num_predict=1500,num_ctx=4096)#1500

    system_template = prompt

    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    chain = chat_prompt | model
    if len(doctor_diagnosis)==1:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "diagnosis":doctor_diagnosis[0]})
        return results
    elif len(doctor_diagnosis)==2:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "doctor_1_diagnosis":doctor_diagnosis[0],
                            "doctor_2_diagnosis":doctor_diagnosis[1]})
        return results
    elif len(doctor_diagnosis)==3:
        results=chain.invoke({"department": department,
                            "diseases": diseases,
                            "medical_history":json.dumps(medical_history),
                            "doctor_1_diagnosis":doctor_diagnosis[0],
                            "doctor_2_diagnosis":doctor_diagnosis[1],
                            "doctor_3_diagnosis":doctor_diagnosis[2]})
        return results



def compare_others_gpt(prompt,medical_history, model, diseases, department,doctor_diagnosis=[]):
    chat = ChatOpenAI(model_name=model, temperature=0.1,max_tokens=1500)
    system_template = prompt
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    if len(doctor_diagnosis)==1:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            diagnosis=doctor_diagnosis[0],

        ).to_messages()
    elif len(doctor_diagnosis)==2:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            doctor_1_diagnosis=doctor_diagnosis[0],
            doctor_2_diagnosis=doctor_diagnosis[1]

        ).to_messages()
    elif len(doctor_diagnosis)==3:
        messages = chat_prompt.format_prompt(
            department=department,
            diseases=diseases,
            medical_history=json.dumps(medical_history),
            doctor_1_diagnosis=doctor_diagnosis[0],
            doctor_2_diagnosis=doctor_diagnosis[1],
            doctor_3_diagnosis=doctor_diagnosis[2],

        ).to_messages()
    response = chat(messages)
    return response.content

### Check others including mine

In [None]:

all_models= ["gpt-4o","llama3.1","gemma2"]
all_models_to_check=["gpt-4o","llama3.1","gemma2"]
type="check_others_input"
prompt_type=compare_others_prompt
for department in departments:
    print(department)
    results={}
    with open(f"results4/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for model in all_models_to_check:
            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details

    with open(f"results/{type}/{department}_{type}.json", "w") as outfile:
        json.dump(results, outfile)

### Self refinement

In [6]:
all_models= ["gpt-4o"]
all_models_to_check=["gpt-4o"]
type="self_refinement"
prompt_type=self_refinement_prompt
for department in departments:
    print(department)
    results={}
    with open(f"results-all-models/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for i,model in enumerate(all_models_to_check):
            doctor_diagnosis=[doctor_diagnosis[i]]
            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details
    path = f"results-self-refinement/{type}/{model}/{department}_{type}.json"
    folder_path = os.path.dirname(path)
    os.makedirs(folder_path, exist_ok=True)
    with open(path, "w") as outfile:
      json.dump(results, outfile)

nephrology department
['1142', '1145', '1148', '1151', '1154', '1157', '1160', '1172', '1175', '1178', '1181', '1184', '1187', '1190', '1193', '1196', '1199']
1142


  chat = ChatOpenAI(model_name=model, temperature=0.1,max_tokens=1500)
  response = chat(messages)


1145
1148
1151
1154
1157
1160
1172
1175
1178
1181
1184
1187
1190
1193
1196
1199
gynecology department
['272', '275', '279', '284', '288', '292', '296', '303', '307', '310', '313', '316', '321', '324', '327', '330', '338', '341', '344', '347', '350']
272
275
279
284
288
292
296
303
307
310
313
316
321
324
327
330
338
341
344
347
350
endocrinology department
['122', '125', '128', '131', '134', '138', '141', '144', '147', '150', '153', '156', '159', '162', '165', '168', '171', '174', '177', '180', '183', '187', '190', '193', '196', '199']
122
125
128
131
134
138
141
144
147
150
153
156
159
162
165
168
171
174
177
180
183
187
190
193
196
199
neurology department
['712', '715', '718', '722', '726', '729', '732', '735', '738', '741', '744', '747', '750', '753', '756', '759', '762', '765', '769', '772', '775', '778', '781', '784', '787', '790']
712
715
718
722
726
729
732
735
738
741
744
747
750
753
756
759
762
765
769
772
775
778
781
784
787
790
pediatrics department
['62', '66', '69', '72',

### Check others without mine

In [None]:
all_models= ["gpt-4o","llama3.1","gemma2"]
all_models_to_check=["gpt-4o","llama3.1","gemma2"]
type="check_others_input_without_mine"
prompt_type=compare_others_prompt_without_mine
for department in departments:
    print(department)
    results={}
    with open(f"results4/with_reasons/{department}_with_reasons.json", "r") as file:
        data = json.load(file)

    department_results=data
    ids=list(department_results.keys())
    print(ids)
    for case_id in ids:
        print(case_id)
        case_details={}
        principal_diagnosis,differential_diagnosis,filtered_clinical_case_dict=select_case_components_based_on_id(df,int(case_id),required_fields)
        doctor_diagnosis=get_diagnosis_by_other_models_by_id(case_id,department_results,all_models)
        case_details["original"]={"main-diagnosis":principal_diagnosis,"differential_diagnosis":differential_diagnosis}
        case_details["predictions"]={}
        for i,model in enumerate(all_models_to_check):
            doctor_diagnosis1=doctor_diagnosis[:]
            if model=="gpt-4o":
              continue
            del doctor_diagnosis1[i]

            if "gpt" in model:
                output=compare_others_gpt(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis1)
            else:
                output=compare_others_ollama(prompt_type,filtered_clinical_case_dict,model,differential_diagnosis,department,doctor_diagnosis1)
            case_details["predictions"][model]=output
        results[str(case_id)]=case_details

    path = f"results5/{type}/{model}/{department}_{type}.json"
    folder_path = os.path.dirname(path)
    os.makedirs(folder_path, exist_ok=True)
    with open(path, "w") as outfile:
      json.dump(results, outfile)