## Imports

In [None]:
!pip install farm-haystack transformers ollama
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install sentence-transformers
!pip install "farm-haystack[inference]"

In [1]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.schema import Document
import ollama
import os
import pickle
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
CUR_LANG = "es"
def EMBEDDINGS_PATH(lang: str):
    return f"embeddings_{CUR_LANG}.pkl"

## Prepare key terms embeddings

In [5]:
if (os.path.exists(EMBEDDINGS_PATH(CUR_LANG))):
    with open(EMBEDDINGS_PATH(CUR_LANG), 'rb') as f:
        document_store = pickle.load(f)
        retriever = EmbeddingRetriever(
            document_store=document_store,
            embedding_model="sentence-transformers/all-MiniLM-L6-v2",
            use_gpu=False 
        )
else:
    document_store = InMemoryDocumentStore(embedding_dim=384)
    key_terms = pd.read_csv("terms.csv")
    documents = [Document(content=term[1][CUR_LANG], id=term[1]["id"]) for term in key_terms.iterrows()]
    document_store.write_documents(documents)
    retriever = EmbeddingRetriever(
            document_store=document_store,
            embedding_model="sentence-transformers/all-MiniLM-L6-v2",
            use_gpu=False 
        )
    document_store.update_embeddings(retriever)
    with open(EMBEDDINGS_PATH(CUR_LANG), 'wb') as f:
        pickle.dump(document_store, f)

Batches: 100%|██████████| 229/229 [00:03<00:00, 60.58it/s]cs/s]
Documents Processed: 10000 docs [00:03, 2597.31 docs/s]         


## Prepare question embedding and question for Llama3.2

In [6]:
def get_relevant_key_terms(question, top_k=10):
    results = retriever.retrieve(query=question, top_k=top_k)
    return {doc.content.replace('"', '') : doc.id for doc in results}

os.popen('ollama serve')
client = ollama.Client()

def ask_llama(question, relevant_key_terms):
    prompt = f"""
        Here is a search prompt by a user of the UN digital library:
        Question: {question}
        Here is a list of key terms. Each key term is in its own square brackets.
        Key Terms: {relevant_key_terms}
        Select the two key terms that are most relevant to the question.
        Output only the selected key terms as they are presented to you: each key term within square brackets.
        Separate each set of square brackets with a semi-colon and no spaces.
        
        """

    return client.chat(model="llama3.2", messages=[{"role": "user", "content": prompt}])['message']['content']


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## Ask question

In [7]:
#question = "I would like to know more about the impact of world war 1 on the economy of Germany."
question = "world war II"

2025/01/08 15:54:11 routes.go:1259: INFO server config env="map[HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_DEBUG:false OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/Users/luke/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:0 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://*] OLLAMA_SCHED_SPREAD:false http_proxy: https_proxy: no_proxy:]"
time=2025-01-08T15:54:11.911+01:00 level=INFO source=images.go:757 msg="total blobs: 8"
time=2025-01-08T15:54:11.912+01:00 level=INFO source=images.go:764 msg="total unused blobs rem

In [8]:
# Get relevant key terms
key_terms = get_relevant_key_terms(question)

key_terms_array = ["[" + term.replace('"', '') + "]" for term in key_terms.keys()]
print("Key Terms:", key_terms_array)

# Join the key terms array into a single string separated by semi-colons
key_terms_string = ";".join(key_terms_array)
print("Relevant Key Terms:", key_terms_string)

Batches: 100%|██████████| 1/1 [00:00<00:00, 105.36it/s]


Key Terms: ['[GUERRA MUNDIAL (1939-1945)]', '[GUERRA MUNDIAL (1914-1918)]', '[SOVIETS]', '[GUERRA NUCLEAR]', '[GUERRA NAVAL]', '[GUERRA CIVIL]', '[HISTORIA MILITAR]', '[PROPAGANDA DE GUERRA]', '[IRAQ]', '[GUERRA DE GUERRILLAS]']
Relevant Key Terms: [GUERRA MUNDIAL (1939-1945)];[GUERRA MUNDIAL (1914-1918)];[SOVIETS];[GUERRA NUCLEAR];[GUERRA NAVAL];[GUERRA CIVIL];[HISTORIA MILITAR];[PROPAGANDA DE GUERRA];[IRAQ];[GUERRA DE GUERRILLAS]


In [9]:
max_tries = 5
is_answer_valid = False

i = 0
while not is_answer_valid and i < max_tries:
    answer = ask_llama(question, key_terms_string)
    print("Answer:", answer)

    answer_split = [term.strip() for term in answer.split(';')]
    is_answer_valid = isinstance(answer, str) and len(answer_split) <= 2
    is_answer_valid = is_answer_valid and all([term.strip() in key_terms_array for term in answer.split(';')])
    print(is_answer_valid)
    i += 1

if i == max_tries:
    # select two first key terms
    answer = ";".join(key_terms_array[:2])
    answer_split = [term.strip() for term in answer.split(';')]
    print("Answer:", answer)

time=2025-01-08T15:54:16.052+01:00 level=INFO source=sched.go:714 msg="new model will fit in available VRAM in single GPU, loading" model=/Users/luke/.ollama/models/blobs/sha256-dde5aa3fc5ffc17176b5e8bdc82f587b24b2678c6c66101bf7da77af9f7ccdff gpu=0 parallel=4 available=11453251584 required="3.8 GiB"
time=2025-01-08T15:54:16.057+01:00 level=INFO source=server.go:104 msg="system memory" total="16.0 GiB" free="6.2 GiB" free_swap="0 B"
time=2025-01-08T15:54:16.057+01:00 level=INFO source=memory.go:356 msg="offload to metal" layers.requested=-1 layers.model=29 layers.offload=29 layers.split="" memory.available="[10.7 GiB]" memory.gpu_overhead="0 B" memory.required.full="3.8 GiB" memory.required.partial="3.8 GiB" memory.required.kv="896.0 MiB" memory.required.allocations="[3.8 GiB]" memory.weights.total="2.4 GiB" memory.weights.repeating="2.1 GiB" memory.weights.nonrepeating="308.2 MiB" memory.graph.full="424.0 MiB" memory.graph.partial="424.0 MiB"
time=2025-01-08T15:54:16.058+01:00 level=IN

Answer: [GUERRA MUNDIAL (1939-1945);] [SOVIETS]
False
Answer: [GUERRA MUNDIAL (1939-1945);] [SOVIETS]
False
Answer: [GUERRA MUNDIAL (1939-1945)];[SOVIETS]
True


In [10]:
print(answer_split)
output = ""
for term in answer_split:
    output += f"subjectheading:{term}"
print(output)

['[GUERRA MUNDIAL (1939-1945)]', '[SOVIETS]']
subjectheading:[GUERRA MUNDIAL (1939-1945)]subjectheading:[SOVIETS]


In [11]:
term1 = answer_split[0].replace('[', '').replace(']', '')
term2 = answer_split[1].replace('[', '').replace(']', '')
print(term1, term2)
print(key_terms[term1], key_terms[term2])


GUERRA MUNDIAL (1939-1945) SOVIETS
1007089 1006079
