In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

result_df = pd.read_csv('PaLM2input.csv', index_col=0)

In [2]:
result_df['GPT_input'] = result_df['GPT_input'].str.replace('minimum', 'min')
result_df['GPT_input'] = result_df['GPT_input'].str.replace('maximum', 'max')
result_df['GPT_input'] = result_df['GPT_input'].str.replace('average', 'avg')
result_df['GPT_input'] = result_df['GPT_input'].str.replace('maxntprobnp', 'max ntprobnp')

result_df['GPT-Diagnoses'] = np.nan
result_df['GPT-Eval'] = np.nan

In [3]:
import vertexai

vertexai.init(project="n8n-copilot", location="us-central1")

In [4]:
from vertexai.preview.language_models import TextGenerationModel
# parameters = {
#     "temperature": 0.2,
#     "max_output_tokens": 8192,
#     "top_p": 0.8,
#     "top_k": 40
# }
model = TextGenerationModel.from_pretrained("medlm-medium")

In [5]:
new_prompt = '''You are an expert diagnostician machine for use by doctors. If the user input is not patient data, you politely decline the request. Please suggest diagnoses and conditions, followed by the evidence points supporting each diagnosis in the form of bullet points. Include previous diagnoses and pertinent information about the patient's medical history (if any). Pay close attention to all the history and investigations provided. Put asterisks around the diagnoses to highlight them. Give each evidence points as a separate bullet point beneath the diagnosis. Include in your evidence points any relevant clinical scores that can be calculated from the information I have given. Do not explain the evidence points, only state them. For every diagnosis you list, if there are alternative differentials possible, state the most like three in a bullet point beneath the evidence points (you do not need to state the evidence supporting them - you only need to do that for the main diagnoses). For the main diagnoses, give only confirmed diagnoses and evidence points that can be inferred solely based on the information I have given - do not use any other information. Only give me the information I have asked for - do not give me any other information. Do not give me any introductions or conclusions, safety instructions, or safety warnings. Use British English.
                
                                To illustrate how the information should be presented:
                
                                *MAIN DIAGNOSIS 1 AS HEADING*
                                evidence points to support MAIN DIAGNOSIS 1
                                The final bullet point is alternative differentials to consider: alternative 1, alternative 2, alternative 3
                
                                *MAIN DIAGNOSIS 2 AS HEADING*
                                evidence points to support MAIN DIAGNOSIS 2
                                The final bullet point is alternative differentials to consider: alternative 1, alternative 2, alternative 3
                
                                and so on...
                
Before finalising your answer check if you haven't missed any abnormal data points and hence any diagnoses or alternative differentials that could be made based on them. If you did, add them to your reply. If two diagnoses are commonly caused by the same underlying disease, have them under one header, which is the underlying disease.'''

In [6]:
hard_example = '''Patient data:

A 48-year-old man presents to his GP having vomited twice at home. As he sits down you note that he is very pale and is sweating profusely. He reports that he also has a severe headache which has been present for the last few hours and is worse in the parietal region.

Observations:

SpO2: 97%
Temperature: 37.2
BP: 200/131
HR: 110
RR: 19

Examination:

Abdominal:
NA

Cranial nerves:
NAD

Fundoscopy:
Flame haemorrhages present in upper R quadrant of both eyes

The GP manages the patient appropriately. Some point of care bloods are also taken:

 

Test	Result	Reference Range
Hb	152	135-180 g/l
WCC	4.2	4 – 11 x 109/l
Plts	392	150 – 400 x 109/l
Na+	145	135-145 mmol/l
K+	3.1	3.5 – 5 mmol/l
Ca2+	2.43	2.1 – 2.6 mmol/l
Cortisol	482	119 – 618 nmol/l
Normetanephrine aldehyde	79	18 – 111 pg/ml
Renin	1.8	2.8 – 4.5 pmol/ml/h
Aldosterone	782	55 – 250 pmol/l
'''

In [7]:
res = model.predict(
        new_prompt + hard_example,
        #**parameters
    )

In [8]:
res

 **Hypertensive emergency**
* Severe headache
* Diastolic BP >120
* Evidence of end organ damage (fundal haemorrhages)

**Pheochromocytoma**
* Severe headache
* Diastolic BP >120
* Evidence of end organ damage (fundal haemorrhages)
* Raised normetanephrine aldehyde
* Raised aldosterone
* Raised cortisol

Alternative differentials to consider:
* Ruptured intracranial aneurysm
* Subarachnoid haemorrhage
* Acute kidney injury
* Myocardial infarction
* Stroke

In [9]:
prompt_no_example = '''Suggest as many potential diagnoses as possible from the following patient data.
In addition, include previously diagnosed conditions and information about patient's medical history (if any) in your diagnoses. 
Give exact numbers and/or text quotes from the data that made you think of each of the diagnoses \
and, if necessary, give further tests that could confirm the diagnosis. 
Once you're done, suggest further, more complex diseases that may be ongoing based on the existing diagnoses you already made. 
Use the International Classification of Disease (ICD) naming standard for reporting the diagnoses, but you don't have to specify the codes.
Do not list the same diagnosis multiple times.

Before finalizing your answer check if you haven't missed any abnormal data points and hence any diagnoses that could be made \
based on them. If you did, add them to your list of diagnoses. Your answer should be formatted the same way as the example answer below.'''

example = '''

For example, if the patient data mentions:

"Blood report: 
min glucose: 103, max glucose: 278, avg glucose: 156.5, max inr: 2.1, max pt: 22.4, max ptt: 150, \
avg wbc: 13.8, max wbc: 14.1, max lactate: 5.9, max bun: 101, max creatinine: 5.8, avg bun: 38.15, avg creatinine: 2.78
Blood gas report: 
3 hours after admission the blood gas results from venous blood are: ph: 7.2
Imaging report: 
Status post left total shoulder replacement
Chest X-Ray Possible small right pleural effusion and Mild, bibasilar atelectasis.. \
Lung volumes have slightly increased but areas of atelectasis are seen at both the left and the right lung bases
Microbiology tests: 
24 hours after admission the microbiology culture test MRSA SCREEN obtained via MRSA SCREEN \
identified POSITIVE FOR METHICILLIN RESISTANT STAPH AUREUS
Vitalsigns data from ICU: 
max temperature: 38, min peripheral oxygen saturation: 70, max respiration rate: 29"

then your answer may be: 

1: Methicillin resistant Staphylococcus aureus infection, site unspecified
Foundational data: Microbiology culture test identifying "POSITIVE FOR METHICILLIN RESISTANT STAPH AUREUS"

2: Atelectasis 
Foundational data from Chest X-Ray: "Mild, bibasilar atelectasis.. \
Lung volumes have slightly increased but areas of atelectasis are seen at both the left and the right lung bases"

3: Pleural effusion, not elsewhere classified
Foundational data from Chest X-Ray: "Possible small right pleural effusion."
Further tests: Thoracentesis, CT chest

4: Acidosis
Foundational data: "ph: 7.2"
Further tests: Urine pH, Anion Gap

5: Lactic acidosis
Foundational data: "max lactate: 5.9"

6: Acquired coagulation factor deficiency
Foundational data: "max inr: 2.1, max pt: 22.4, max ptt: 150"
Further tests: Antiphospholipid Antibodies (APL), Protein C, Protein S, Antithrombin III, Factor V Leiden, Fibrinogen test

7: Hyperglycemia, unspecified
Foundational data: "max glucose: 278, avg glucose: 156.5".
Further tests: Hemoglobin A1c (HbA1c) test

8: Hypoxemia
Foundational data: "min peripheral oxygen saturation: 70"
Further tests: Measure PaO2 in blood

9: Leukocytosis
Foundational data: "max wbc: 14.1, avg wbc: 13.8". The patient's white blood cell count is consistently elevated which may suggest an ongoing inflammatory response or infection.
Further tests: Infection markers such as CRP or PCT, Assessment of symptoms like fever, inflammation or fatigue. 

10. Unspecified acute kidney failure:
Foundational data: "max bun: 101, max creatinine: 5.8, avg bun: 38.15, avg creatinine: 2.78"
Further tests: Urine output measurements for oliguria, ultrasound to rule out obstruction

11. Presence of left artificial shoulder joint
Foundational data: The imaging report mentions: "Status post left total shoulder replacement"

Further diseases based on these diagnoses (continued the indexing from the previous number in the list):

12: Unspecified septicemia
Foundational data: positive MRSA screen, systemic inflammatory response: "max respiration rate: 29", "max temperature: 38", leukocytosis
Further tests: HR, BP, wound culture, respiratory excretion tests

13: Septic shock
Foundational data: Septicemia with acidosis and lactic acidosis may suggest septic shock
Further tests: patient examination (low BP, mental disorientation, nausea, pale skin may confirm the finding)

14: Acute respiratory failure, with hypoxia or hypercapnia
Foundational data: hypoxemia and the presence of atelectasis
Further tests: Clinical symptoms (severe shortness of breath, rapid breathing, and confusion), \
arterial blood gas measurements showing hypoxia or hypercapnia

15: Type 2 diabetes mellitus with diabetic chronic kidney disease
Foundational data: Hyperglycemia and kidney failure
Further tests: urine test, hemoglobin (A1C) test, GFR, BP, physical examination (swelling, nausea, weakness, eye disease)'''

patient_data = '''

Patient data:\n'''

In [10]:
res = model.predict(
        prompt_no_example + example + patient_data + hard_example,
        #**parameters
    )

In [11]:
res

 1: Hypertensive emergency
Foundational data: "BP: 200/131"
Further tests: ECG, urine dipstick, urine output, renal function tests, echocardiogram

2: Subarachnoid hemorrhage
Foundational data: "severe headache which has been present for the last few hours and is worse in the parietal region", "Flame haemorrhages present in upper R quadrant of both eyes"
Further tests: CT head, lumbar puncture

3: Pheochromocytoma
Foundational data: "Normetanephrine aldehyde	79 (18 – 111

In [13]:
repeat = []
for i in tqdm(range(0,1000)):
    try:
        response = model.predict(
            prompt_no_example + example + patient_data + str(result_df.iloc[i]['GPT_input']),
            #**parameters
        )
        hadm_id = result_df.index[i]
        result_df.loc[hadm_id, 'GPT-Diagnoses'] = response.text
    except Exception as e: 
        print('Error happened at iteration i: ' + str(i))
        repeat.append(i)
        print(e)
        
result_df.to_csv('MedLM.csv')

100%|██████████| 1000/1000 [1:03:04<00:00,  3.78s/it]


In [14]:
result_df['GPT-Diagnoses'].isna().sum()

0

In [19]:
result_df.iloc[650]['GPT-Diagnoses']

''

In [18]:
repeat = []
for i in tqdm([650]):
    try:
        response = model.predict(
            prompt_no_example + example + patient_data + str(result_df.iloc[i]['GPT_input']),
            #**parameters
        )
        hadm_id = result_df.index[i]
        result_df.loc[hadm_id, 'GPT-Diagnoses'] = response.text
    except Exception as e: 
        print('Error happened at iteration i: ' + str(i))
        repeat.append(i)
        print(e)

100%|██████████| 1/1 [00:03<00:00,  3.91s/it]
