# UniversalNER

UniversalNER is a prompt-based NER model, where entitites are given the model which are then extracted from the text. It can recognize diverse types of entities or concepts in text corpora from a wide range of domains.


[More information here](https://universal-ner.github.io/)

This notebook runs a proposed UniversalNER pipeline, but where universalNER is hosted locally, and then the API can be queried.

[More information here](https://github.com/universal-ner/universal-ner)

In [None]:
import json
import requests

In [None]:
with open('../data/llm_dataset.json') as f:
    data = json.load(f)

In [None]:
patient_dict = {}
patient_nums = [0, 15, 30, 78, 165, 276, 345, 428, 567, 735, 852, 961]
entities_list = ["name of person",
                  "location of visit",
                  "marital status",
                  #"alcohol consumption",
                  #"allergies", 
                  "male, female or non-binary",
                  "race ethnicity nationality",
                #   "recreational drug use",
                #   "tobacco use",
                  "treatment procedure",
                  "metric and metric value",
                  "medical condition",
                  "medication",
                  "medication dosage",
                  "address",
                  "ID",
                  "NHS Number",
                  "date of birth",
                  "visit date"
                  ]

prompt_template = """A virtual assistant answers questions from a user based on the provided text.
USER: Text: {input_text}
ASSISTANT: I’ve read this text.
USER: What describes {entity_name} in the text?
ASSISTANT:
"""

url = f"http://127.0.0.1:8080/completion"

for patient_num in patient_nums:
    text = data[patient_num].strip()

    patient_entity_dict = {}
    for entity in entities_list:
        prompt = prompt_template.format_map({"input_text": text, "entity_name": entity})
        req_json = {
                "stream": False,
                "n_predict": 400,
                "temperature": 0,
                "stop": [
                    "</s>",
                ],
                "repeat_last_n": 256,
                "repeat_penalty": 1,
                "top_k": 20,
                "top_p": 0.75,
                "tfs_z": 1,
                "typical_p": 1,
                "presence_penalty": 0,
                "frequency_penalty": 0,
                "mirostat": 0,
                "mirostat_tau": 5,
                "mirostat_eta": 0.1,
                "grammar": "",
                "n_probs": 0,
                "prompt": prompt
        }
        res = requests.post(url, json=req_json)
        output = res.json()["content"]
        patient_entity_dict[entity] = output

    patient_dict[patient_num] = {
        "text": text,
        "entity_dict": patient_entity_dict 
    }



In [None]:
print(patient_dict[patient_nums[4]]['text'])
patient_dict[patient_nums[4]]['entity_dict']