In [1]:
!pip install farm-haystack transformers ollama
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install sentence-transformers
!pip install "farm-haystack[inference]"

In [5]:
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.schema import Document
import ollama

In [2]:
# Initialize the document store
document_store = InMemoryDocumentStore(embedding_dim=384)

# Load your 8000 key terms 1 per line from terms.csv
key_terms = open('terms.csv').read().split('\n')

# Create documents from key terms
documents = [Document(content=term) for term in key_terms]

# Write documents to the store
document_store.write_documents(documents)

In [None]:
retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    use_gpu=False 
)

# Embed the key terms and update the document store
document_store.update_embeddings(retriever)

Batches: 100%|██████████| 227/227 [00:02<00:00, 77.35it/s]cs/s]
Documents Processed: 10000 docs [00:02, 3333.78 docs/s]         


In [4]:
def get_relevant_key_terms(question, top_k=20):
    results = retriever.retrieve(query=question, top_k=top_k)
    return [doc.content for doc in results]

# Example query
question = "What are the geopolitical impacts of the Syrian refugee crisis?"
relevant_key_terms = get_relevant_key_terms(question)
print("Relevant Key Terms:", relevant_key_terms)

Batches: 100%|██████████| 1/1 [00:00<00:00, 151.88it/s]


Relevant Key Terms: ['IRAQI REFUGEES', 'PROTRACTED REFUGEE SITUATIONS', 'REFUGEES', 'WESTERN SAHARAN REFUGEES', 'PALESTINE REFUGEES', 'ALGERIAN REFUGEES', 'REFUGEE STATUS', 'SYRIANS', 'REFUGEE-RECEIVING COUNTRIES', 'AFRICAN REFUGEES', 'LEBANON SITUATION', 'UGANDAN REFUGEES', 'REFUGEE PROTECTION', 'CENTRAL AFRICAN REFUGEES', 'CENTRAL AMERICAN REFUGEES', 'REFUGEE ASSISTANCE', 'CHADIAN REFUGEES', 'MALIAN REFUGEES', 'CHILD REFUGEES', 'AFGHAN REFUGEES']


In [19]:
# Initialize the Ollama client
client = ollama.Client()

prompt = f"""
From the following list of key terms, please select the three that are most relevant to the question:
Question: {question}
Key Terms: {relevant_key_terms}
Output only the selected key terms, separated by commas.
"""

# Send the message and get the response
response = client.chat(model="llama3.2", messages=[{"role": "user", "content": prompt}])

# Print the response
print(response)
print(response['message']['content'])

{'model': 'llama3.2', 'created_at': '2024-11-25T17:14:56.176258Z', 'message': {'role': 'assistant', 'content': 'SYRIANS, REFUGEES, REFUGEE-RECEIVING COUNTRIES'}, 'done_reason': 'stop', 'done': True, 'total_duration': 1367920583, 'load_duration': 28161000, 'prompt_eval_count': 236, 'prompt_eval_duration': 796000000, 'eval_count': 19, 'eval_duration': 542000000}
SYRIANS, REFUGEES, REFUGEE-RECEIVING COUNTRIES
