In [1]:
!pip install faiss-cpu sentence-transformers transformers



In [2]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# load a small embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

  return torch._C._cuda_getDeviceCount() > 0


In [4]:
docs = [
    "Kubernetes manages containerized workloads",
    "PostgreSQL is a relational database",
    "MCP enables structured tool use for LLMs."
]

In [5]:
# Create embeddings for the documents
embeddings = model.encode(docs, convert_to_numpy=True)

In [6]:
# Build FAISS index
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

In [7]:
print("Index size:", index.ntotal)

Index size: 3


In [8]:
query = "What is Kubernetes?"
q_emb = model.encode([query], convert_to_numpy=True)
D, I = index.search(q_emb, k=2)

for idx in I[0]:
    print("Match:", docs[idx])

Match: Kubernetes manages containerized workloads
Match: MCP enables structured tool use for LLMs.


In [9]:
from transformers import pipeline

qa_model = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct")

context = docs[I[0][0]]
prompt = f"Answer based on context:\nContext: {context}\nQuestion: {query}\nAnswer:"

print(qa_model(prompt, max_new_tokens=100)[0]["generated_text"])


Device set to use cpu


Answer based on context:
Context: Kubernetes manages containerized workloads
Question: What is Kubernetes?
Answer: Kubernetes manages containerized workloads.
