In [11]:
pip install sentence-transformers numpy ollama

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [12]:
import numpy as np
from sentence_transformers import SentenceTransformer
import ollama

### Data Ingestion

In [13]:
corpus = [
    "Machine learning allows computers to learn from data without being explicitly programmed.",
    "Deep learning is a subset of machine learning that uses neural networks with many layers.",
    "RAG stands for Retrieval-Augmented Generation, which combines retrieval with generation.",
    "FAISS is a library for efficient similarity search in dense vector spaces.",
    "Ollama lets you run LLMs locally on your machine."
]

In [14]:
print(corpus)

['Machine learning allows computers to learn from data without being explicitly programmed.', 'Deep learning is a subset of machine learning that uses neural networks with many layers.', 'RAG stands for Retrieval-Augmented Generation, which combines retrieval with generation.', 'FAISS is a library for efficient similarity search in dense vector spaces.', 'Ollama lets you run LLMs locally on your machine.']


In [15]:
# Embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = model.encode(corpus, convert_to_numpy=True)

# Normalize embeddings - for cosine similarity
corpus_embeddings = corpus_embeddings / np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)


### Retrieval

In [16]:
def retrieve(query, top_k=2):
    query_embeddings = model.encode([query], convert_to_numpy=True)
    query_embeddings = query_embeddings / np.linalg.norm(query_embeddings, axis=1, keepdims=True)

    # cosine similarity = dot product of normalized vectors
    scores = np.dot(corpus_embeddings, query_embeddings.T).squeeze()

    # Get top-k indices
    top_k_idx = scores.argsort()[-top_k:][::-1]

    results = [(corpus[i], float(scores[i])) for i in top_k_idx]
    return results

### Generate answer with Ollama

In [19]:
def rag_answer(query, top_k=2, model_name="llama2"):
    retrieved = retrieve(query, top_k=top_k)
    context = "\n".join([doc for doc, _ in retrieved])
    prompt = f"""Answer the following question using only the context below.
If the answer cannot be found in the context, say "I don't know."

Context:
{context}

Question: {query}
Answer:"""
    response = ollama.chat(model=model_name, messages=[{"role": "user", "content": prompt}])
    return response["message"]["content"]

# Testing

In [20]:

if __name__ == "__main__":
    query = "What is deep learning?"
    print("User Query:", query)

    results = retrieve(query)
    print("\nRetrieved Context:")
    for doc, score in results:
        print(f"- {doc} (score={score:.4f})")

    print("\nOllama's Answer:")
    print(rag_answer(query))

User Query: What is deep learning?

Retrieved Context:
- Deep learning is a subset of machine learning that uses neural networks with many layers. (score=0.8278)
- Machine learning allows computers to learn from data without being explicitly programmed. (score=0.4627)

Ollama's Answer:
Sure, I'd be happy to help! Based on the context you provided, deep learning can be defined as a subset of machine learning that uses neural networks with many layers to enable computers to learn from data without being explicitly programmed.
