In [5]:
pip install --upgrade pip

Collecting pip
  Using cached pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Using cached pip-25.2-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.0
    Uninstalling pip-24.0:
      Successfully uninstalled pip-24.0
Successfully installed pip-25.2
Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install pydantic_settings

Note: you may need to restart the kernel to use updated packages.


In [7]:
from pydantic_settings import BaseSettings

In [9]:
class settings(BaseSettings):
    APP_NAME: str = "llm_genai"
    ENV: str = "dev"
    HOST: str = "0.0.0.0"
    PORT: int = 8000
    LLM_PROVIDER: str = "openai"
    OPENAI_API_KEY: str | None = None
    MODEL_NAME: str = "gpt-3.5-turbo"
    EMB_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
    FAISS_INDEX_PATH: str = "./data/index.faiss"
    DOC_STORE: str = "./data/chunk.parquet"
    REDIS_URL: str = "redis://redis:6379/0"
    RATE_LIMIT_RPS: float = 3.0
    PROMETHEUS_ENABLED:bool = True
    OTEL_ENABLED: bool = True
    JWT_SECRET: str = "dev-secret-change"

    class Config:
        env_file = ".env"

settings = settings()

settings



settings(APP_NAME='llm_genai', ENV='dev', HOST='0.0.0.0', PORT=8000, LLM_PROVIDER='openai', OPENAI_API_KEY=None, MODEL_NAME='gpt-3.5-turbo', EMB_MODEL='sentence-transformers/all-MiniLM-L6-v2', FAISS_INDEX_PATH='./data/index.faiss', DOC_STORE='./data/chunk.parquet', REDIS_URL='redis://redis:6379/0', RATE_LIMIT_RPS=3.0, PROMETHEUS_ENABLED=True, OTEL_ENABLED=True, JWT_SECRET='dev-secret-change')

In [11]:
pip install prometheus_client

Collecting prometheus_client
  Downloading prometheus_client-0.22.1-py3-none-any.whl.metadata (1.9 kB)
Downloading prometheus_client-0.22.1-py3-none-any.whl (58 kB)
Installing collected packages: prometheus_client
Successfully installed prometheus_client-0.22.1
Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install fastapi

Collecting fastapi
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting starlette<0.48.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.47.2-py3-none-any.whl.metadata (6.2 kB)
Collecting anyio<5,>=3.6.2 (from starlette<0.48.0,>=0.40.0->fastapi)
  Downloading anyio-4.10.0-py3-none-any.whl.metadata (4.0 kB)
Collecting idna>=2.8 (from anyio<5,>=3.6.2->starlette<0.48.0,>=0.40.0->fastapi)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting sniffio>=1.1 (from anyio<5,>=3.6.2->starlette<0.48.0,>=0.40.0->fastapi)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Downloading fastapi-0.116.1-py3-none-any.whl (95 kB)
Downloading starlette-0.47.2-py3-none-any.whl (72 kB)
Downloading anyio-4.10.0-py3-none-any.whl (107 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Using cached sniffio-1.3.1-py3-none-any.whl (10 kB)
Installing collected packages: sniffio, idna, anyio, starlette, fastapi

   -------- ------------------------------- 1/5 [id

In [13]:
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
from fastapi import APIRouter, Response

latency = Histogram("llm_latency_seconds", "LLM call latency")
errors = Counter("llm_errors_total", "Total LLM errors")
tokens = Counter("llm_tokens_total", "Total tokens used", ["type"])  # prompt/completion

router = APIRouter()

@router.get("/metrics")
def metrics():
    return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)

In [17]:
metrics.latency

AttributeError: 'function' object has no attribute 'latency'

In [18]:
import contextlib
import time
# from metrics import latency, errors

@contextlib.contextmanager
def traced_llm(span_name: str = "llm_call"):
    start = time.perf_counter()
    try:
        yield
    except Exception:
        errors.inc()
        raise
    finally:
        latency.observe(time.perf_counter() - start)

In [19]:
import re

BLOCK_PATTERNS = [
    re.compile(r"\b(credit card|ssn|aadhaar|pan number)\b", re.I),
]

def is_blocked(text: str) -> bool:
    return any(p.search(text or "") for p in BLOCK_PATTERNS)

In [20]:
pip install redis

Collecting redis
  Downloading redis-6.4.0-py3-none-any.whl.metadata (10 kB)
Downloading redis-6.4.0-py3-none-any.whl (279 kB)
Installing collected packages: redis
Successfully installed redis-6.4.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
import time
from fastapi import HTTPException
from redis import Redis

class TokenBucket:
    def __init__(self, redis: Redis, key: str, rps: float):
        self.redis, self.key, self.rps = redis, key, rps
        self.capacity = max(1, int(rps * 3))

    def allow(self) -> bool:
        now = time.time()
        with self.redis.pipeline() as p:
            p.zremrangebyscore(self.key, 0, now - 1)
            p.zcard(self.key)
            p.execute()
            count = self.redis.zcard(self.key)
            if count >= self.capacity:
                return False
            self.redis.zadd(self.key, {str(now): now})
            self.redis.expire(self.key, 2)
            return True

def enforce_rate_limit(bucket: TokenBucket):
    if not bucket.allow():
        raise HTTPException(status_code=429, detail="Rate limit exceeded")

In [26]:
from abc import ABC, abstractmethod
from typing import Iterable

class LLMProvider(ABC):
    @abstractmethod
    def stream_chat(self, messages: list[dict]) -> Iterable[str]:
        ...

    @abstractmethod
    def embed(self, texts: list[str]) -> list[list[float]]:
        ...

In [27]:
pip install openai

Collecting openai
  Using cached openai-1.99.9-py3-none-any.whl.metadata (29 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Using cached jiter-0.10.0-cp311-cp311-win_amd64.whl.metadata (5.3 kB)
Collecting tqdm>4 (from openai)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting certifi (from httpx<1,>=0.23.0->openai)
  Using cached certifi-2025.8.3-py3-none-any.whl.metadata (2.4 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Using cached httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)
Collecting h11>=0.16 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Using cached h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)
Using cached openai-1.99.9-py3-none-any.whl (786 kB)
Using cached distro-1.9.0-py3-none-any.whl (20 kB)
Downloading httpx-0.28.1-py3-non

In [28]:
import os
from typing import Iterable
from openai import OpenAI
# from ..config import settings

client = OpenAI(api_key=settings.OPENAI_API_KEY)

class OpenAIProvider:
    def stream_chat(self, messages):
        stream = client.chat.completions.create(
            model=settings.MODEL_NAME,
            messages=messages,
            stream=True,
            temperature=0.2,
        )
        for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            if delta:
                yield delta

    def embed(self, texts):
        # Use text-embedding-3-small by default
        resp = client.embeddings.create(model="text-embedding-3-small", input=texts)
        return [d.embedding for d in resp.data]

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:
pip install sentence_transformers

In [None]:
def chunk_text(t: str, max_len: int = 512, overlap: int = 64):
    words = t.split()
    i = 0
    while i < len(words):
        j = min(len(words), i + max_len)
        yield " ".join(words[i:j])
        i = j - overlap
        if i < 0:
            i = 0

In [4]:
pip install pandas

Collecting pandas
  Using cached pandas-2.3.1-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas)
  Using cached numpy-2.3.2-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.1-cp311-cp311-win_amd64.whl (11.3 MB)
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
   -------------------------------------

In [6]:
pip install sentence_transformers

Collecting sentence_transformers
  Using cached sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence_transformers)
  Using cached transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
Collecting torch>=1.11.0 (from sentence_transformers)
  Using cached torch-2.8.0-cp311-cp311-win_amd64.whl.metadata (30 kB)
Collecting scikit-learn (from sentence_transformers)
  Using cached scikit_learn-1.7.1-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting scipy (from sentence_transformers)
  Using cached scipy-1.16.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence_transformers)
  Using cached huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting Pillow (from sentence_transformers)
  Using cached pillow-11.3.0-cp311-cp311-win_amd64.whl.metadata (9.2 kB)
Collecting filelock (from transformers<5.0.0,>=4.41.0->sentence_transformers)
  Using cached filelock-3.19.1-py3-none-any.whl.

In [7]:
from pathlib import Path
import pandas as pd
from sentence_transformers import SentenceTransformer
# from ..config import settings
# from ..utils.text import chunk_text

  from .autonotebook import tqdm as notebook_tqdm


In [None]:


model = SentenceTransformer(settings.EMB_MODEL)

def ingest_directory(data_dir: str, out_parquet: str):
    rows = []
    for p in Path(data_dir).rglob("*.txt"):
        text = p.read_text(encoding="utf-8", errors="ignore")
        for i, chunk in enumerate(chunk_text(text, 512)):
            rows.append({"doc": p.name, "chunk_id": i, "text": chunk})
    df = pd.DataFrame(rows)
    df.to_parquet(out_parquet, index=False)

In [9]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp311-cp311-win_amd64.whl.metadata (5.2 kB)
Downloading faiss_cpu-1.12.0-cp311-cp311-win_amd64.whl (18.2 MB)
   ---------------------------------------- 0.0/18.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/18.2 MB ? eta -:--:--
    --------------------------------------- 0.3/18.2 MB ? eta -:--:--
   - -------------------------------------- 0.5/18.2 MB 1.2 MB/s eta 0:00:15
   - -------------------------------------- 0.8/18.2 MB 1.2 MB/s eta 0:00:15
   -- ------------------------------------- 1.0/18.2 MB 1.1 MB/s eta 0:00:16
   -- ------------------------------------- 1.3/18.2 MB 1.2 MB/s eta 0:00:15
   --- ------------------------------------ 1.6/18.2 MB 1.2 MB/s eta 0:00:14
   ---- ----------------------------------- 1.8/18.2 MB 1.3 MB/s eta 0:00:13
   ---- ----------------------------------- 2.1/18.2 MB 1.3 MB/s eta 0:00:13
   ----- ---------------------------------- 2.6/18.2 MB 1.3 MB/s eta 0:00:12
   ------ ---

In [4]:
from src.app.llm.openai_provider import OpenAIProvider

ModuleNotFoundError: No module named 'config'