In [1]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings

# Step 1: Load the Embedding Model
# This will download the model if you don't have it already.
# Use "cuda" if you have a GPU, "cpu" if you don't.
model = SentenceTransformer('BAAI/bge-large-en-v1.5', device="cpu")

# The model works best if you add this instruction for retrieval tasks.
# You can also use model.encode("...", prompt_name="retrieval")
instruction = "Represent this sentence for searching relevant passages: "

# Step 2: Prepare Your Documents (Your Knowledge Base)
documents = [
    "The latest iPhone 15 features a titanium design and a powerful A17 Pro chip.",
    "Penguins are a group of flightless birds living primarily in the Southern Hemisphere.",
    "A popular baking ingredient, yeast helps dough rise by converting sugars into carbon dioxide.",
    "The Treaty of Versailles was signed in 1919, officially ending World War I.",
    "Python is a high-level programming language known for its readability and versatility."
]

# Give unique IDs to each document (can be any IDs you want)
document_ids = [f"doc_{i}" for i in range(len(documents))]

# Step 3: Generate Embeddings and Create a Vector Database

# Initialize a persistent Chroma client. This will create a `chroma_db` directory.
chroma_client = chromadb.PersistentClient(path="./chroma_db")

# Create a collection. This is like a table in a database.
collection = chroma_client.get_or_create_collection(
    name="my_knowledge_base",
    metadata={"hnsw:space": "cosine"} # Cosine similarity is often a good choice
)

# Check if the collection is empty to avoid re-adding the same data
if collection.count() == 0:
    print("Indexing documents...")
    
    # Create the embeddings in bulk.
    # We add the instruction for each document for optimal performance.
    document_embeddings = model.encode([instruction + doc for doc in documents], normalize_embeddings=True)
    
    # Add the documents, their IDs, and their embeddings to the collection.
    collection.add(
        documents=documents,
        ids=document_ids,
        embeddings=document_embeddings.tolist() # Chroma expects a list of lists
    )
    print("Documents indexed successfully!")
else:
    print("Collection already populated.")

# Step 4: Query the System
def retrieve_documents(query, top_k=2):
    """
    Queries the vector database for the most relevant documents.
    
    Args:
        query (str): The user's question or search term.
        top_k (int): How many results to return.
    """
    
    # Encode the query. USE THE SAME INSTRUCTION.
    query_embedding = model.encode(instruction + query, normalize_embeddings=True).tolist()
    
    # Query the collection
    results = collection.query(
        query_embeddings=query_embedding,
        n_results=top_k
    )
    
    return results

# Example Queries
queries = [
    "What are iPhones made of?",
    "Tell me about animals that can't fly.",
    "How does bread rise?"
]

for query in queries:
    print(f"\nQuery: '{query}'")
    results = retrieve_documents(query)
    
    # `results` contains 'ids', 'documents', 'distances'
    for i, doc in enumerate(results['documents'][0]): 
        print(f"Result {i+1}: {doc}")

Indexing documents...
Documents indexed successfully!

Query: 'What are iPhones made of?'
Result 1: The latest iPhone 15 features a titanium design and a powerful A17 Pro chip.
Result 2: A popular baking ingredient, yeast helps dough rise by converting sugars into carbon dioxide.

Query: 'Tell me about animals that can't fly.'
Result 1: Penguins are a group of flightless birds living primarily in the Southern Hemisphere.
Result 2: Python is a high-level programming language known for its readability and versatility.

Query: 'How does bread rise?'
Result 1: A popular baking ingredient, yeast helps dough rise by converting sugars into carbon dioxide.
Result 2: The Treaty of Versailles was signed in 1919, officially ending World War I.


In [2]:
import os
from dotenv import load_dotenv
# First, install the openai package: pip install openai
from openai import OpenAI

# Set your API key (get it from https://platform.openai.com/)
load_dotenv()
TOGETHER_AI_API_KEY = os.getenv("TOGETHER_API_KEY","")

def rag_with_openai(user_query, top_k=2):
    # 1. Retrieve relevant context
    results = retrieve_documents(user_query, top_k=top_k)
    context = "\n\n".join(results['documents'][0])
    
    # 2. Create a prompt for the LLM
    prompt = f"""Based on the following information, answer the user's question. If the answer isn't in the context, say you don't know.

Context:
{context}

User Question: {user_query}

Answer:"""
    
    # 3. Call the LLM (e.g., GPT-3.5-Turbo)
    client = OpenAI(
        base_url="https://api.together.xyz/v1",  # Together AI's API endpoint
        api_key=TOGETHER_AI_API_KEY,  # API key for authentication
    )    
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    return response.choices[0].message.content

# Test the full RAG pipeline
user_question = "What is the iPhone 15's design made from?"
answer = rag_with_openai(user_question)
print(f"\nQuestion: {user_question}")
print(f"Answer: {answer}")


Question: What is the iPhone 15's design made from?
Answer: The iPhone 15's design is made from titanium.


In [3]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings

# Load the embedding model
model = SentenceTransformer('BAAI/bge-large-en-v1.5') # Using a smaller model for speed

# Initialize ChromaDB client
client = chromadb.PersistentClient(path="./chroma_metadata_db")

# Create a collection. We'll specify we want to use cosine similarity.
collection = client.get_or_create_collection(
    name="tech_docs",
    metadata={"hnsw:space": "cosine"}
)

# Define our documents with METADATA
documents = [
    "The iPhone 15 Pro features a new titanium chassis.",
    "The MacBook Pro is powered by the M3 chip for incredible performance.",
    "The iPad Pro has a stunning Liquid Retina XDR display.",
    "Apple Watch Series 9 introduces a new double-tap gesture."
]

# Define metadata for each document
metadatas = [
    {"category": "phone", "release_year": 2023},
    {"category": "laptop", "release_year": 2023},
    {"category": "tablet", "release_year": 2022},
    {"category": "wearable", "release_year": 2023}
]

ids = ["doc1", "doc2", "doc3", "doc4"]

# Add everything to the collection
# Chroma can generate embeddings for you, but we provide our own for consistency.
embeddings = model.encode(documents).tolist()

collection.add(
    documents=documents,
    embeddings=embeddings, # We provide the embeddings
    metadatas=metadatas,   # We provide the metadata
    ids=ids
)

# Query 1: Basic Semantic Search
print("=== Basic Semantic Search ===")
results = collection.query(
    query_embeddings=model.encode("new Apple phone").tolist(),
    n_results=2
)
for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
    print(f"Document: {doc}")
    print(f"Metadata: {meta}\n")

# Query 2: Semantic Search WITH Metadata Filtering (Powerful!)
print("=== Search Filtered to Laptops Only ===")
results = collection.query(
    query_embeddings=model.encode("powerful device").tolist(),
    n_results=2,
    where={"category": "laptop"} # <-- THE KEY DIFFERENCE!
)
for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
    print(f"Document: {doc}")
    print(f"Metadata: {meta}\n")

=== Basic Semantic Search ===
Document: The iPhone 15 Pro features a new titanium chassis.
Metadata: {'category': 'phone', 'release_year': 2023}

Document: Apple Watch Series 9 introduces a new double-tap gesture.
Metadata: {'release_year': 2023, 'category': 'wearable'}

=== Search Filtered to Laptops Only ===
Document: The MacBook Pro is powered by the M3 chip for incredible performance.
Metadata: {'category': 'laptop', 'release_year': 2023}

