# NER vs LLM

---

### Configure OLLama

Install OLLama

Open terminal and type:

`ollama run phi3:3.8b`



In [60]:
import json
import requests

# This code returns an empty string...

url = "http://localhost:11434/api/chat" 

question = "Hi. Can you help me?"

payload = {
    "model": "phi3:3.8b",
    "message":[{"role":"user","content": question}],
    "stream": False,
}

response = requests.post(url, json=payload)

# Parse the response
if response.status_code == 200:
    print("Response:\n", response.json()["message"]["content"])
else:
    print("Error:\n", response.status_code)

Response:
 


In [99]:
url = "http://localhost:11434/api/generate" 

payload = {
    "model": "phi3:3.8b",
    "prompt": "Identify so called named entities in this sentence: \'Kraków jest największym miastem w Polsce\'. After that list out the entities in a form of strings in a python list. I want your response to only include the python list without any additional code or \"```\" characters. Be careful to use the polish form of the words.",
    "context":[1],
    "options":{
        "top_k":10
    },
    "stream": False,
}

response = requests.post(url, data=json.dumps(payload))

# Parse the response
if response.status_code == 200:
    print("Response:\n", response.json()["response"])
else:
    print("Error:\n", response.status_code)

Response:
 ['Kraków', 'POLAND']


### Take 1000 passages from fiqa corpus

In [1]:
from datasets import load_dataset
from numpy.random import choice

fiqa_corpus = load_dataset("clarin-knext/fiqa-pl", "corpus")["corpus"]

fiqa_corpus = fiqa_corpus["text"]
fiqa_idx = choice(len(fiqa_corpus), 1000, replace=False)
fiqa_corpus = [fiqa_corpus[i] for i in fiqa_idx]

---

### NER baseline

In [95]:
import spacy

nlp = spacy.load("pl_core_news_sm")

def get_ents(text):
    doc = nlp(text)
    entity_dict = {}
    for ent in doc.ents:
        text, label = ent.text, ent.label_
        if (text, label) not in entity_dict.keys():
            entity_dict[(text, label)] = 0
        entity_dict[(text, label)] += 1
    return entity_dict

In [96]:
get_ents(
    "Wczoraj w Krakowie miało miejsce spotkanie prezydentów Polski i Stanów Zjednoczonych."
)

{('Krakowie', 'placeName'): 1,
 ('Polski', 'placeName'): 1,
 ('Stanów Zjednoczonych', 'placeName'): 1}