In [2]:
import json
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

In [3]:
with open('../data/llm_dataset.json') as f:
    data = json.load(f)

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = 1
n_batch = 512
llm = LlamaCpp(
    model_path="/Users/scarlettkynoch/Documents/Projects/privfp-gen-experiments/models/quantized_q4_1.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /Users/scarlettkynoch/Documents/Projects/privfp-gen-experiments/models/quantized_q4_1.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_1     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_1     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_1     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q4_1     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_1     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_1     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:            blk.0.ffn_down.weight q4_1     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    7:          

In [None]:
patient_dict = {}
patient_nums = [0, 15, 30, 78, 165, 276, 345, 428, 567, 735, 852, 961]
entities_list = ["name of person",
                  "location of visit",
                  "marital status",
                  #"alcohol consumption",
                  #"allergies", 
                  "male, female or non-binary",
                  "race ethnicity nationality",
                #   "recreational drug use",
                #   "tobacco use",
                  "treatment procedure",
                  "metric and metric value",
                  "medical condition",
                  "medication",
                  "medication dosage",
                  "address",
                  "ID",
                  "NHS Number",
                  "date of birth",
                  "visit date"
                  ]

prompt_template = """A virtual assistant answers questions from a user based on the provided text.
USER: Text: {input_text}
ASSISTANT: I’ve read this text.
USER: What describes {entity_name} in the text?
ASSISTANT: (model's predictions in JSON format)
"""

for patient_num in patient_nums:
    text = data[patient_num].strip()

    patient_entity_dict = {}
    for entity in entities_list:
        prompt = prompt_template.format_map({"input_text": text, "entity_name": entity})
        output = llm(prompt)
        patient_entity_dict[entity] = output

    patient_dict[patient_num] = {
        "text": text,
        "entity_dict": patient_entity_dict 
    }



In [15]:
prompt_template = """A virtual assistant answers questions from a user based on the provided text.
USER: Text: {input_text}
ASSISTANT: I’ve read this text.
USER: What describes {entity_name} in the text?
ASSISTANT: (model's predictions in JSON format)
"""

patient_nums = [0, 15, 30, 78, 165, 276, 345, 428, 567, 735, 852, 961]
patient_num = patient_nums[5]
text = data[patient_num].strip()
entity = "person full name" # Metric - feed in metric value - get's value. Medication - medication name dosage - you get dosage.

prompt = prompt_template.format_map({"input_text": text, "entity_name": entity})
print(text)
output = llm(prompt)


Mrs. Vernice Orn is a 33-year-old female, born on 1948-07-14, with NHS number 847 621 0246. She is married, of Black or Black British - African ethnicity and was seen on 1981-11-30 at Sarum Road Hospital (BMI) by Dr. Gino Block for acute bronchitis. She was prescribed Acetaminophen 325 MG Oral Tablet.


Llama.generate: prefix-match hit


[]


llama_print_timings:        load time =    9493.12 ms
llama_print_timings:      sample time =       0.52 ms /     2 runs   (    0.26 ms per token,  3824.09 tokens per second)
llama_print_timings: prompt eval time =    1370.52 ms /   159 tokens (    8.62 ms per token,   116.01 tokens per second)
llama_print_timings:        eval time =      78.57 ms /     1 runs   (   78.57 ms per token,    12.73 tokens per second)
llama_print_timings:       total time =    1458.36 ms


In [9]:
print(patient_dict[patient_nums[2]]['text'])
patient_dict[patient_nums[2]]['entity_dict']

Mr. Mauro Paucek is a 34 year old male with an NHS number 183 236 1107, born on 1982-03-07. He is of mixed ethnicity, white and Asian, married, and living at 257 McLaughlin Lane Apt 92, Basingstoke, RG21 1ZB. He visited Basingstoke and North Hampshire Hospital on 2016-09-09 with a primary reason of viral sinusitis. The doctor attending was Dylan Robel. 
Observations were taken at the visit, including body height (170.0 cm), pain severity (2.0 {score}), body weight (83.0 kg), body mass index (28.7 kg/m2), diastolic blood pressure (55.0 mm[Hg]), systolic blood pressure (114.0 mm[Hg]), heart rate (85.0 /min) and respiratory rate (12.0 /min). Tobacco smoking status was recorded as Ex-smoker (finding).
Prescriptions included Hydrochlorothiazide 25 MG Oral Tablet, lisinopril 10 MG Oral Tablet, and amLODIPine 2.5 MG Oral Tablet. Procedures included Assessment of health and social care needs and Med


{'name of person': '["Mr. Mauro Paucek", "Dylan Robel"]',
 'location of visit': '["Basingstoke"]',
 'marital status': '["married"]',
 'male, female or non-binary': '["Dylan Robel"]',
 'race ethnicity nationality': '["white", "Asian"]',
 'treatment procedure': '["Assessment of health and social care needs"]',
 'metric and metric value': '["body height", "pain severity", "body weight", "body mass index", "diastolic blood pressure", "systolic blood pressure", "heart rate", "respiratory rate"]',
 'medical condition': '["viral sinusitis", "body height", "pain severity", "body weight", "body mass index", "diastolic blood pressure", "systolic blood pressure", "heart rate", "respiratory rate"]',
 'medication': ' ["Hydrochlorothiazide", "lisinopril", "amLODIPine"]',
 'medication dosage': '[["Hydrochlorothiazide", "lisinopril", "amLODIPine"]',
 'address': ' ["257 McLaughlin Lane Apt 92"]',
 'ID': ' ["NHS number"]',
 'NHS Number': ' ["183 236 1107"]',
 'date of birth': '["1982-03-07"]',
 'visit d