### Plan D:
ECG Impressions + Clinical Features -> LLM -> 1 response

Follow prompt in Table 6 of this paper: https://arxiv.org/pdf/2305.19523

In [1]:
# Import packages
import json
from huggingface_hub import login
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, pipeline
import transformers
import random
import torch
import time
import re
from tqdm import tqdm
import pandas as pd

# Set GPUs
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

print("Available GPUs:", torch.cuda.device_count())
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

# Set seeds
random.seed(0)
torch.manual_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


Available GPUs: 2
GPU 0: NVIDIA A100-SXM4-40GB
GPU 1: NVIDIA A100-SXM4-40GB


<torch._C.Generator at 0x7f931937e970>

In [2]:
# Log into Huggingface
with open("../../huggingface_token.txt", "r") as file:
    access_token = file.read().strip()
login(access_token)

# Load Huggingface Model
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token, low_cpu_mem_usage=True,
                    torch_dtype=torch.float16, device_map='auto')

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:07<00:00,  1.81s/it]


In [16]:
def get_message(note):
    system = 'You are a cardiologist. Your task is to generate a prognosis for the next few years with reasoning. The three possible prognoses are survivor, sudden cardiac death, and pump failure death. Order the three prognoses from most likely to least likely and explain the reasoning behind the order. Please structure the results as follows: RANKING: [Your ranking here] \n REASONING: [Your explanation here]'
    prompt = f"Here is the patient data: \n{note}"

    messages = [
		{"role": "system", "content": system},
		{"role": "user", "content": prompt}
	]

    return messages

def extract_assistant_response(response):
    parts = response.split("assistant\n\n", 1)
    return parts[1].strip() if len(parts) > 1 else response

In [17]:
# Load in csv file with prompts
df = pd.read_csv("../Data/subject-info-cleaned-with-prompts.csv")
# print(df['Prompts'][0])

In [29]:
# Test message
message = get_message(df['Prompts'][500])
message

[{'role': 'system',
  'content': 'You are a cardiologist. Your task is to generate a prognosis for the next few years with reasoning. The three possible prognoses are survivor, sudden cardiac death, and pump failure death. Order the three prognoses from most likely to least likely and explain the reasoning behind the order. Please structure the results as follows: RANKING: [Your ranking here] \n REASONING: [Your explanation here]'},
 {'role': 'user',
  'content': 'Here is the patient data: \nAge: 72\nGender: Male \nWeight: 81 kg\nHeight: 168 cm\nNYHA Class: II\nBlood Pressure: 100/70 mmHg\nPast Medical History: Ischemic dilated cardiomyopathy, Myocardial Infarction\nAlbumin (g/L): 42,6\nALT or GPT (IU/L): 14.0\nAST or GOT (IU/L): 24.0\nTotal Cholesterol (mmol/L): 5,54\nCreatinine (mmol/L): 130.0\nGamma-glutamil transpeptidase (IU/L): 12\nGlucose (mmol/L): 5,7\nHemoglobin (g/L): 133.0\nHDL (mmol/L): 1,84\nPotassium (mEq/L): 3,29\nLDL (mmol/L): 3,3\nSodium (mEq/L): 142.0\nPro-BNP (ng/L):

In [30]:
# Put message into LLM
input_text = tokenizer.apply_chat_template(message, tokenize = False, add_generation_prompt = True)
inputs = tokenizer(input_text, return_tensors = "pt").to(model.device)
output = model.generate(**inputs, max_new_tokens = 1000)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [31]:
# Get result
result = tokenizer.decode(output[0], skip_special_tokens = True)
result = result.replace("**", "")
result = extract_assistant_response(result)
print(result)

RANKING: 1. Survivor
 REASONING: The patient has a history of ischemic dilated cardiomyopathy and myocardial infarction, which indicates a high risk of cardiac complications. However, the patient's current medications, including Angiotensin II Receptor Blocker, Beta Blockers, Loop Diuretics, Spironolactone, and Statins, are appropriate for managing heart failure and reducing the risk of further cardiac events. The patient's LVEF (Left Ventricular Ejection Fraction) is 30%, which is low but not extremely low. Additionally, the patient's blood pressure is within a relatively normal range, and the absence of ventricular tachycardia or sustained ventricular tachycardia on the ECG is a positive sign. The patient's NT-proBNP level is elevated at 4250.0 ng/L, which is consistent with heart failure, but the level is not extremely high. Considering these factors, the patient is likely to survive for the next few years with proper medical management and adherence to treatment.

RANKING: 2. Pump 