In [3]:
import chromadb
from sentence_transformers import SentenceTransformer
import os

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight & fast

# Connect to Chroma (use same dir as container)
client = chromadb.Client()
collection = client.get_or_create_collection(name="docs")

# Example docs — replace with your own!
documents = [
    "The warranty period for product X is two years from the purchase date.",
    "To reset your device, hold the power button for 10 seconds.",
    "Our support team is available 24/7 for premium customers."
]

# Embed and store
embeddings = model.encode(documents).tolist()
ids = [f"id_{i}" for i in range(len(documents))]

collection.add(
    documents=documents,
    embeddings=embeddings,
    ids=ids
)

print("✅ Ingestion complete. Documents added to Chroma!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Ingestion complete. Documents added to Chroma!


In [6]:
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from dotenv import load_dotenv
import os
import httpx
import chromadb
from sentence_transformers import SentenceTransformer

load_dotenv()
LLAMA_API_URL = os.getenv("LLAMA_API_URL")
CHROMA_DB_DIR = os.getenv("CHROMA_DB_DIR")

app = FastAPI(title="GenAI + ChromaDB + Llama")

app.mount("/static", StaticFiles(directory="static"), name="static")

class QueryRequest(BaseModel):
    question: str

# Load Chroma and embedder
client = chromadb.Client()
collection = client.get_or_create_collection(name="docs")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Retrieve
def retrieve_context(question: str) -> str:
    query_emb = embedder.encode([question]).tolist()
    results = collection.query(
        query_embeddings=query_emb,
        n_results=3
    )
    docs = results['documents'][0]
    return "\n".join(docs)

# Llama call
async def generate_answer_llama(question: str, context: str) -> str:
    prompt = f"""
    Answer the question using ONLY this context.
    If unknown, say 'I don't know'.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """

    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{LLAMA_API_URL}/api/generate",
            json={"model": "llama3", "prompt": prompt, "stream": False}
        )
        response.raise_for_status()
        data = response.json()
        return data["response"].strip()

@app.post("/api/query")
async def query_api(request: QueryRequest):
    try:
        context = retrieve_context(request.question)
        answer = await generate_answer_llama(request.question, context)
        return JSONResponse(content={"answer": answer, "context": context})
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})

@app.get("/", response_class=HTMLResponse)
async def serve_home():
    with open("static/index.html") as f:
        return HTMLResponse(f.read())


The error `RuntimeError: Directory 'static' does not exist` occurs because the FastAPI application is configured to serve static files from a directory named `static`, but this directory was not found in the current environment. We need to create this directory.

In [5]:
import os

# Create the static directory if it doesn't exist
if not os.path.exists("static"):
    os.makedirs("static")
    print("Created 'static' directory.")
else:
    print("'static' directory already exists.")

Created 'static' directory.
