### Prepare embeddings

In [None]:
!pip install sentence-transformers chromadb langchain google-genai requests

In [173]:
import os
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pickle


In [174]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [175]:
client = chromadb.Client(Settings(persist_directory="./chroma_db"))
collection = client.get_or_create_collection(name="docs")

In [176]:
# load pickle file with the dict data
with open("docs_url.pickle", "rb") as f:
    scraped_pages = pickle.load(f)

In [177]:
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)

In [178]:
for url, content in scraped_pages.items():
    if content is None or len(content) == 0:
        continue
    # Split content into chunks
    chunks = splitter.split_text(content)
    
    # Generate embeddings for each chunk
    embeddings = embedding_model.encode(chunks, show_progress_bar=True)
    
    # Add chunks + embeddings to Chroma with metadata (source URL)
    collection.add(
        documents=chunks,
        embeddings=embeddings.tolist(),
        metadatas=[{"source": url}] * len(chunks),
        ids=[f"{url}_chunk_{i}" for i in range(len(chunks))]
    )

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/11 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/20 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/42 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/19 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [179]:
query = "How do I create a simple GET endpoint?"
query_embedding = embedding_model.encode([query])[0]

results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=5,
)

print("Top chunks:")
for doc, metadata in zip(results['documents'][0], results['metadatas'][0]):
    print(f"Source: {metadata['source']}")
    print(doc)
    print("---")


Top chunks:
Source: https://fastapi.tiangolo.com/reference/apirouter
```
get(path,*,response_model=Default(None),status_code=None,tags=None,dependencies=None,summary=None,description=None,response_description="Successful Response",responses=None,deprecated=None,operation_id=None,response_model_include=None,response_model_exclude=None,response_model_by_alias=True,response_model_exclude_unset=False,response_model_exclude_defaults=False,response_model_exclude_none=False,include_in_schema=True,response_class=Default(JSONResponse),name=None,callbacks=None,openapi_extra=None,generate_unique_id_function=Default(generate_unique_id))
```

Add apath operationusing an HTTP GET operation.
---
Source: https://fastapi.tiangolo.com/alternatives
Requests is one of the most downloaded Python packages of all time

The way you use it is very simple. For example, to do aGETrequest, you would write:

```
GET
```

```
response=requests.get("http://example.com/some/url")
```

```
response=requests.get("http:

In [180]:
GEMINI_API_KEY = ""
with open("Gemini.key", "r") as f:
    GEMINI_API_KEY = f.read().strip()

In [181]:
import base64
import os
from google import genai
from google.genai import types
import requests
import json

In [182]:
PROMPT = """
You are a helpful assistant. Use the following documentation excerpts to answer the question, if the context provided does not contain an answer state it:

<context>
{context}
</context>

Question: {query}

Answer:"""

In [183]:
def generate(prompt):
    client = genai.Client(
        api_key=GEMINI_API_KEY,
    )

    model = "gemini-2.5-flash"
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=prompt),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        thinking_config = types.ThinkingConfig(
            thinking_budget=0,
        ),
    )
    response = ""
    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        print(chunk.text, end="")

In [184]:
def generate_with_ollama(prompt):
    model = "qwen3:0.6b"

    response = requests.post(
        "http://localhost:11434/api/generate",
        json={"prompt": prompt, "model": model, "think": False},
        stream=True
    )
    response.raise_for_status()

    for line in response.iter_lines(decode_unicode=True):
        if line:
            try:
                data = json.loads(line)
                # Assuming each JSON chunk contains a key 'response' or similar
                print(data.get("response", ""), end="", flush=True)
            except json.JSONDecodeError:
                # Not a JSON line? Just print raw
                print(line)

In [192]:
#query = "How do I create a simple GET endpoint?"
#query = "How can I create custom exception handlers?"
query = "How can I enable CORS"
top_k = 5

In [193]:
query_embedding = embedding_model.encode([query])[0]

results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=top_k,
)

retrieved_docs = results['documents'][0]

context = "\n\n---\n\n".join(retrieved_docs)

formatted_prompt = PROMPT.format(
    query=query,
    context=context,
)

#response = generate(formatted_prompt)
response = generate_with_ollama(formatted_prompt)

To enable CORS, you can use the `CORSMiddleware` from Starlette, which is a convenience middleware for FastAPI. Here's how you can enable CORS:

1. Import the `CORSMiddleware` from `fastapi.middleware.cors` using the following import:
   ```python
   from fastapi.middleware.cors import CORSMiddleware
   ```

2. When configuring your FastAPI app, include the `CORSMiddleware` middleware with the appropriate options like `allow_origins`, `allow_methods`, and others. For example:
   ```python
   import fastapi

   app = fastapi.App("app_name")
   app.add_middleware(CORSMiddleware(...))
   ```

This will allow cross-origin requests to be handled by the middleware, which is essential for applications that need to support multiple origins and methods.