## Imports

In [None]:
!pip install farm-haystack transformers ollama
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install sentence-transformers
!pip install "farm-haystack[inference]"

In [22]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.schema import Document
import ollama
import os
import pickle

In [23]:
DOCUMENT_STORE_PKL = "document_store.pkl"

## Prepare key terms embeddings

In [24]:
if (os.path.exists(DOCUMENT_STORE_PKL)):
    with open(DOCUMENT_STORE_PKL, 'rb') as f:
        document_store = pickle.load(f)
        retriever = EmbeddingRetriever(
            document_store=document_store,
            embedding_model="sentence-transformers/all-MiniLM-L6-v2",
            use_gpu=False 
        )
else:
    document_store = InMemoryDocumentStore(embedding_dim=384)
    key_terms = open('terms.csv').read().split('\n')
    documents = [Document(content=term) for term in key_terms]
    document_store.write_documents(documents)
    retriever = EmbeddingRetriever(
            document_store=document_store,
            embedding_model="sentence-transformers/all-MiniLM-L6-v2",
            use_gpu=False 
        )
    document_store.update_embeddings(retriever)
    with open(DOCUMENT_STORE_PKL, 'wb') as f:
        pickle.dump(document_store, f)

## Prepare question embedding and question for Llama3.2

In [25]:
def get_relevant_key_terms(question, top_k=20):
    results = retriever.retrieve(query=question, top_k=top_k)
    return [doc.content for doc in results]

os.popen('ollama serve')
client = ollama.Client()

def ask_llama(question, relevant_key_terms):
    prompt = f"""
        Here is a search prompt by a user of the UN digital library:
        Question: {question}
        Here is a list of key terms. Each key term is in square brackets.
        Key Terms: {relevant_key_terms}
        Select ideally one, if necessary two key terms that are most relevant to the question.
        Output only the selected key terms as they are presented to you: each key term within square brackets, each set of square brackets separated by a semi-colon and no space.
        """

    return client.chat(model="llama3.2", messages=[{"role": "user", "content": prompt}])['message']['content']


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Error: listen tcp 127.0.0.1:11434: bind: address already in use


## Ask question

In [26]:
#question = "I would like to know more about the impact of world war 1 on the economy of Germany."
question = "world war ii"

In [27]:
# Get relevant key terms
key_terms = get_relevant_key_terms(question)

key_terms_array = ["[" + term.replace('"', '') + "]" for term in key_terms]
print("Key Terms:", key_terms_array)

# Join the key terms array into a single string separated by semi-colons
key_terms_string = ";".join(key_terms_array)
print("Relevant Key Terms:", key_terms_string)

Batches: 100%|██████████| 1/1 [00:00<00:00, 59.27it/s]

Key Terms: ['[WORLD WAR (1939-1945)]', '[WORLD WAR (1914-1918)]', '[WAR]', '[WORLD HISTORY]', '[WAR PROPAGANDA]', '[NUCLEAR WAR]', '[LAW OF WAR]', '[MILITARY HISTORY]', '[SOVIETS]', '[WAR CRIMES]', '[PRISONERS OF WAR]', '[MILITARY OCCUPATION]', '[KOREAN WAR (1950-1953)]', '[WAR PREVENTION]', '[INTERNATIONAL DECADES]', '[HISTORY]', '[AIR WARFARE]', '[WAR VICTIMS]', '[WAR CRIMINALS]', '[ISRAEL-ARAB WAR (1948-1949)]']
Relevant Key Terms: [WORLD WAR (1939-1945)];[WORLD WAR (1914-1918)];[WAR];[WORLD HISTORY];[WAR PROPAGANDA];[NUCLEAR WAR];[LAW OF WAR];[MILITARY HISTORY];[SOVIETS];[WAR CRIMES];[PRISONERS OF WAR];[MILITARY OCCUPATION];[KOREAN WAR (1950-1953)];[WAR PREVENTION];[INTERNATIONAL DECADES];[HISTORY];[AIR WARFARE];[WAR VICTIMS];[WAR CRIMINALS];[ISRAEL-ARAB WAR (1948-1949)]





In [28]:
is_answer_valid = False
while not is_answer_valid:
    answer = ask_llama(question, key_terms_string)
    print("Answer:", answer)

    answer_split = [term.strip() for term in answer.split(';')]
    is_answer_valid = isinstance(answer, str) and len(answer_split) <= 2
    is_answer_valid = is_answer_valid and all([term.strip() in key_terms_array for term in answer.split(';')])
    print(is_answer_valid)

Answer: [WORLD WAR (1939-1945); WAR]
False
Answer: [WORLD WAR (1939-1945)];[WAR]
True


In [29]:
print(answer_split)
output = ""
for term in answer_split:
    output += f"subjectheading:{term}"
print(output)

['[WORLD WAR (1939-1945)]', '[WAR]']
subjectheading:[WORLD WAR (1939-1945)]subjectheading:[WAR]


time=2024-12-09T16:45:28.084+01:00 level=INFO source=sched.go:714 msg="new model will fit in available VRAM in single GPU, loading" model=/Users/luke/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff gpu=0 parallel=4 available=11453251584 required="3.8 GiB"
time=2024-12-09T16:45:28.084+01:00 level=INFO source=server.go:105 msg="system memory" total="16.0 GiB" free="4.5 GiB" free_swap="0 B"
time=2024-12-09T16:45:28.085+01:00 level=INFO source=memory.go:343 msg="offload to metal" layers.requested=-1 layers.model=29 layers.offload=29 layers.split="" memory.available="[10.7 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.8 GiB" memory.required.partial="3.8 GiB" memory.required.kv="896.0 MiB" memory.required.allocations="[3.8 GiB]" memory.weights.total="2.4 GiB" memory.weights.repeating="2.1 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="424.0 MiB" memory.graph.partial="424.0 MiB"
time=2024-12-09T16:45:28.086+01:00 level=IN