# LangChain RAG Example with SimpleVecDB

This notebook demonstrates how to use SimpleVecDB as a vector store with LangChain for retrieval-augmented generation (RAG).

In [None]:
# Install required packages (if needed)
# !pip install langchain langchain-core langchain-openai simplevecdb

In [None]:
import os
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.utils.utils import convert_to_secret_str
from simplevecdb import VectorDB
from simplevecdb.config import config

# Load environment variables

## Initialize SimpleVecDB and Add Documents

In [None]:
# Initialize SimpleVecDB and get/create a collection
data_dir = "./examples/rag"
db = VectorDB(os.path.join(data_dir, "test.db"))

# Get or create a collection (you can have multiple collections per database)
collection = db.collection("langchain_demo")

# Add sample texts to the collection
texts = [
    "LangChain is awesome for building LLM applications",
    "SimpleVecDB rocks as a lightweight vector store",
    "RAG combines retrieval with generation for better answers"
]

collection.add_texts(texts)

print(f"✓ Added {len(texts)} documents to collection 'langchain_demo'")

## Create Vector Store and Retriever

In [None]:
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings(
    base_url=f"http://{config.SERVER_HOST}:{config.SERVER_PORT}/v1",
    api_key=convert_to_secret_str("your-embedding-api-key")
)

# Create LangChain vector store from SimpleVecDB (specify collection name)
vectorstore = db.as_langchain(embeddings=embeddings, collection_name="langchain_demo")

# Create retriever to fetch relevant documents
retriever = vectorstore.as_retriever()

print("✓ Vector store and retriever initialized for collection 'langchain_demo'")

## Setup RAG Chain

In [None]:
# Create a proper RAG chain

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

llm = ChatOpenAI(
    model="your-model",
    base_url="your-base-url",
    api_key=convert_to_secret_str("your-api-key"),
    temperature=0.0,
)

template = """
Answer the question based ONLY on the following context."

Context:
{context}

Question: {question}

Answer:"""
prompt = PromptTemplate.from_template(template)

# Build RAG chain: retrieve docs -> format context -> question -> LLM
chain = (
		{"context": retriever | (lambda docs: "\n\n".join(doc.page_content for doc in docs)),
		 "question": RunnablePassthrough()}
		| prompt
		| llm
		| StrOutputParser()
)

print("✓ RAG chain components ready")

## Query the Vector Store

In [None]:
# Query the RAG chain

query = "What components make up a RAG chain?"
response = chain.invoke(query)

print(f"Question: {query}")
print(f"Answer: {response}")

## Cleanup (Optional)

In [None]:
import glob

# Close the database connection
db.close()

# Cleanup: Remove the database files created during this example
db_file = os.path.join(data_dir, "test.db")

# Remove SQLite and usearch index files
for f in glob.glob(f"{db_file}*"):
    os.remove(f)

print("✓ Cleanup complete")