In [1]:
import yaml
import os
from sentence_transformers import SentenceTransformer
from pathlib import Path
import torch
import structlog

logger = structlog.getLogger()
device = (
    torch.accelerator.current_accelerator().type
    if torch.accelerator.is_available()
    else "cpu"
)
logger.debug(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


[2m2025-10-24 21:50:50[0m [[32m[1mdebug    [0m] [1mUsing device: cuda            [0m


In [2]:
CONFIG_PATH = Path(".").resolve().parents[1] / "config.yml"
CACHE_DIR = Path(".").resolve().parents[1] / "model_cache"

In [5]:
print(CONFIG_PATH)

/home/joshuale/coding/github/local-rag/services/embedding_service/config.yml


In [3]:
with open(CONFIG_PATH, "r") as f:
    cfg = yaml.safe_load(f)
    f.close()

model_name = cfg["model"]["name"]
# device = cfg["model"]["device"]

CACHE_DIR.mkdir(parents=False, exist_ok=True)
local_model_dir = CACHE_DIR / model_name.replace("/", "_")

In [4]:
if not local_model_dir.exists():
    print(f"🔽 Downloading model {model_name} to {local_model_dir}")
    model = SentenceTransformer(model_name, device=device, cache_folder=str(local_model_dir))
    model.save(str(local_model_dir))
else:
    print(f"✅ Loading model from local cache: {local_model_dir}")
    model = SentenceTransformer(str(local_model_dir), device=device)

✅ Loading model from local cache: /home/joshuale/coding/github/local-rag/services/embedding_service/model_cache/sentence-transformers_all-MiniLM-L12-v2


In [None]:
def encode(texts):
    if isinstance(texts, str):
        texts = [texts]
    embeddings = model.encode(
        texts, normalize_embeddings=True, batch_size=16, show_progress_bar=False
    )
    return embeddings

In [None]:
texts = ["I live in a city", "Cat loves dog"]
embeddings = encode(texts)
embeddings.shape

In [None]:
type(embeddings)

In [None]:
# checking the class

DEVICE = (
    torch.accelerator.current_accelerator().type
    if torch.accelerator.is_available()
    else "cpu"
)

class EmbeddingModel:
    """Singleton wrapper for embedding model (with local caching)."""
    
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            with open(CONFIG_PATH, "r") as f:
                cfg = yaml.safe_load(f)

            model_name = cfg["model"]["name"]

            CACHE_DIR.mkdir(parents=True, exist_ok=True)
            local_model_dir = CACHE_DIR / model_name.replace("/", "_")

            if not local_model_dir.exists():
                logger.debug(f"🔽 Downloading model {model_name} to {local_model_dir}")
                model = SentenceTransformer(model_name, device=DEVICE, cache_folder=str(local_model_dir))
                model.save(str(local_model_dir))
            else:
                logger.debug(f"✅ Loading model from local cache: {local_model_dir}")
                model = SentenceTransformer(str(local_model_dir), device=DEVICE)

            cls._instance = super().__new__(cls)
            cls._instance.model = model
            cls._instance.model_name = model_name
            cls._instance.device = DEVICE

        return cls._instance

    def encode(self, texts):
        if isinstance(texts, str):
            texts = [texts]
        embeddings = self.model.encode(
            texts, normalize_embeddings=True, batch_size=16, show_progress_bar=False
        )
        return embeddings.tolist()

In [None]:
# try calling the class - used in routers/embed.py later
from pydantic import BaseModel

class EmbedResponse(BaseModel):
    model: str
    embeddings: list[list[float]]

embedder = EmbeddingModel()
vectors = embedder.encode(texts)
EmbedResponse(model=embedder.model_name, embeddings=vectors)