In [1]:
!pip install farm-haystack transformers ollama
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install sentence-transformers
!pip install "farm-haystack[inference]"

In [1]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.schema import Document
import ollama

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
document_store = InMemoryDocumentStore(embedding_dim=384)

key_terms = open('terms.csv').read().split('\n')

documents = [Document(content=term) for term in key_terms]

document_store.write_documents(documents)

In [3]:
retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    use_gpu=False 
)

document_store.update_embeddings(retriever)

Batches: 100%|██████████| 227/227 [00:03<00:00, 63.52it/s]cs/s]
Documents Processed: 10000 docs [00:03, 2748.04 docs/s]         


In [None]:
def get_relevant_key_terms(question, top_k=20):
    results = retriever.retrieve(query=question, top_k=top_k)
    return [doc.content for doc in results]

question = "polar bears"
relevant_key_terms = get_relevant_key_terms(question)
print("Relevant Key Terms:", relevant_key_terms)

Batches: 100%|██████████| 1/1 [00:00<00:00, 139.63it/s]

Relevant Key Terms: ['REINDEER', 'ANIMALS', 'WILDLIFE', 'ANTARCTICA', 'PETRODOLLARS', 'ARCTIC REGION', 'POLAR REGIONS', 'INSECTS', 'TURTLES', 'FAUNA', 'ARCTIC OCEAN', 'INUIT', 'DOGS', 'BIRDS', 'RUSSIANS', 'RABBITS', 'DRAUGHT ANIMALS', 'WILDLIFE CONSERVATION', 'WINTER SPORTS', 'BEETLES']





In [44]:
client = ollama.Client()

def ask_llama(question, relevant_key_terms):
    prompt = f"""
        From the following list of key terms, please select the three that are most relevant to the question:
        question: {question}
        Key Terms: {relevant_key_terms}
        Output only the selected key terms, separated by commas, nothing else.
        """

    return client.chat(model="llama3.2", messages=[{"role": "user", "content": prompt}])['message']['content']


In [46]:
answer = ask_llama(question, relevant_key_terms)
print("Answer:", answer)

# check content is a list of 3 key terms
is_answer_valid = isinstance(answer, str) and len(answer.split(',')) == 3
is_answer_valid = is_answer_valid and all([term.strip() in relevant_key_terms for term in answer.split(',')])
print(is_answer_valid)

Answer: ARCTIC REGION, POLAR REGIONS, WILDLIFE
True
