# LlamaIndex RAG Example with SimpleVecDB

This notebook demonstrates how to use SimpleVecDB as a vector store with LlamaIndex for retrieval-augmented generation (RAG).

In [None]:
# Install required packages (if needed)
# !pip install llama-index llama-index-core simplevecdb

In [1]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    Settings,
)
from llama_index.llms.openai_like import OpenAILike
from llama_index.embeddings.openai import OpenAIEmbedding
from simplevecdb import VectorDB
from simplevecdb.config import config
import os

  from .autonotebook import tqdm as notebook_tqdm


## Setup LLM and Embeddings

In [None]:
Settings.llm = OpenAILike(
    model="your-model",
    api_base="your-base-url",
    api_key="your-api-key",
    temperature=0.1,
)

Settings.embed_model = OpenAIEmbedding(
    api_base=f"http://{config.SERVER_HOST}:{config.SERVER_PORT}/v1",
    api_key="your-api-key"
)

## Setup: Create Sample Data

First, let's create a few sample text variables.

In [None]:
# Create data directory and sample documents

# Use absolute path - get the directory where this notebook is located
notebook_dir = os.getcwd() + "/examples/rag"
data_dir = os.path.join(notebook_dir, "data")

os.makedirs(data_dir, exist_ok=True)

# Create sample text files
with open(os.path.join(data_dir, "doc1.txt"), "w") as f:
    f.write("SimpleVecDB is a lightweight vector database powered by usearch HNSW indexing. It's designed to be simple and easy to use.")

with open(os.path.join(data_dir, "doc2.txt"), "w") as f:
    f.write("Vector databases store embeddings and allow for efficient similarity search. They are essential for RAG applications.")

with open(os.path.join(data_dir, "doc3.txt"), "w") as f:
    f.write("LlamaIndex is a data framework for building LLM applications. It provides tools for data ingestion, indexing, and querying.")

print(f"✓ Sample documents created in {data_dir}")
print(f"✓ Files in directory: {os.listdir(data_dir)}")

## Initialize SimpleVecDB Vector Store

In [None]:
# Initialize SimpleVecDB
db = VectorDB("./examples/rag/test.db")
vector_store = db.as_llama_index()

# Create storage context with our vector store
storage_context = StorageContext.from_defaults(vector_store=vector_store)

print("✓ Vector store initialized")

## Load Documents and Create Index

In [None]:
# Load documents from the data directory
# SimpleDirectoryReader needs file_extractor or will use default supported types
# Verify data directory and files exist
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"Data directory not found: {data_dir}")

files_in_dir = os.listdir(data_dir)
if not files_in_dir:
    raise ValueError(f"No files found in {data_dir}")

print(f"✓ Found files in {data_dir}: {files_in_dir}")

documents = SimpleDirectoryReader(
    input_files=[os.path.join(data_dir, f) for f in files_in_dir],
).load_data()

print(f"Loaded {len(documents)} documents from {data_dir}")

# Create index from documents
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)

print("✓ Index created and documents embedded")

## Query the Index

In [None]:
# Create query engine
query_engine = index.as_query_engine()

# Query the index
response = query_engine.query("What is SimpleVecDB?")

print("Response:")
print(response)

In [None]:
# Try another query
response2 = query_engine.query("What are vector databases used for?")

print("Response:")
print(response2)

## Cleanup (Optional)

In [None]:
import shutil
import glob

# Close the database connection
db.close()

# Optionally remove test files
if os.path.exists(data_dir):
    shutil.rmtree(data_dir)

# Remove SQLite and usearch index files
db_file = "./examples/rag/test.db"
for f in glob.glob(f"{db_file}*"):
    os.remove(f)

print("✓ Cleanup complete")