<a href="https://colab.research.google.com/github/khajum/ai-playground/blob/main/rag/rag-application-101/EmbeddingManager.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAG Pipelines - Embadding Manager

In [None]:
# Install all embeding and vectordb related packaged if not already installed
!pip install sentence-transformers faiss-cpu chromadb

In [16]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
class EmbeddingManager:
  """Handles the document embeding generation using SentenceTransformer"""

  def __init__(self, embedding_model_name:str="all-MiniLM-L6-v2"):
    """
    Initialize the EmbeddingManager.

    Args:
      embedding_model_name (str): Name of the embedding model to use.
    """
    self.embedding_model_name = embedding_model_name
    self.model = None
    self._load_model()

  def _load_model(self):
    """
    Load the SentenceTransformer embedding model.
    """
    try:
      print(f"Loading SentenceTransformer model: {self.embedding_model_name}")
      self.model = SentenceTransformer(self.embedding_model_name)
      print(f"Loaded SentenceTransformer model successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
    except Exception as e:
      print(f"Error loading embedding model: {e}")
      raise

  def generate_embeddings(self, texts:List[str]) -> np.ndarray:
    """
    Generate embeddings for a list of texts.

    Args:
      texts (List[str]): List of texts to generate embeddings for.

    Returns:
      np.ndarray: Array of embeddings.
    """
    if self.model is None:
      self._load_model()
    try:
      print(f"Generating embeddings for {len(texts)} texts...")
      embeddings = self.model.encode(texts, show_progress_bar=True)
      print(f"Successfully generated embeddings for {len(texts)} texts with shape: {embeddings.shape}")
      return embeddings
    except Exception as e:
      print(f"Error generating embeddings: {e}")
      raise


# Initialize the embedding manager
embedding_manager = EmbeddingManager()
embeddings = embedding_manager.generate_embeddings([doc.page_content for doc in all_pdf_document])
