In [2]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_core.embeddings import Embeddings
from typing import Optional, List


def load_embeddings_model_hf(model_name: Optional[str]) -> Embeddings:
    """
    load embeddings model.

    Args:
        model_name (str): Model name
    
    Returns:
        Embeddings: Embeddings model
    """
    try:
        # Instantiate embeddings
        embeddings_model = HuggingFaceEmbeddings(model_name=model_name, show_progress=True)
    except Exception as e:
        raise Exception(f"Failed to load embeddings model: {e}")
    
    return embeddings_model

def generate_document_embeddings(documents: List[str], embeddings_model: Embeddings) -> List[List[float]]:
    """
    Embed documents.

    Args:
        documents (List[str]): List of documents
        embeddings_model (Embeddings): Embeddings model
    
    Returns:
        List[List[float]]: List of embeddings
    """
    try:
        # Embed documents
        embeddings = embeddings_model.embed_documents(documents)
    except Exception as e:
        raise Exception(f"Failed to generate document embeddings: {e}")
    return embeddings

def generate_query_embeddings(query: str, embeddings_model: Embeddings) -> List[float]:
    """
    Embed query.

    Args:
        query (str): Query
        embeddings_model (Embeddings): Embeddings model
    
    Returns:
        List[float]: Embedding
    """
    try:
        # Embed query
        embedding = embeddings_model.embed_query(query)
    except Exception as e:
        raise Exception(f"Failed to generate query embeddings: {e}")    

    return embedding


In [None]:
from typing import Optional, Dict, Any, List
from pydantic import BaseModel

class EmbeddingsRequest(BaseModel):
    model: str = "sentence-transformers/all-MiniLM-L6-v2"
    query: Optional[str] = None

class EmbeddingsResponse(BaseModel):
    embeddings: List[float]

In [3]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"

documents = [
    "The quick brown fox jumps over the lazy dog.",
    "A quick brown dog outpaces a quick fox.",
    "The quick brown fox is quick.",
    "The quick brown dog is brown."
]

query = "The quick brown fox jumps over the lazy dog."

# Load embeddings model
embedding_model = load_embeddings_model_hf(model_name)

# Generate document embeddings
document_embeddings = generate_document_embeddings(documents, embedding_model)

# Generate query embeddings
query_embedding = generate_query_embeddings(query, embedding_model)

print("Document embeddings:")
print(document_embeddings)

print("Query embedding:")
print(query_embedding)

  from tqdm.autonotebook import tqdm, trange
Batches: 100%|██████████| 1/1 [00:03<00:00,  3.09s/it]
Batches: 100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

Document embeddings:
[[0.04393358901143074, 0.058934394270181656, 0.048178452998399734, 0.0775480717420578, 0.026744389906525612, -0.0376296192407608, -0.002605091081932187, -0.0599430687725544, -0.0024960567243397236, 0.022072825580835342, 0.04802593216300011, 0.05575532466173172, -0.03894541412591934, -0.02661680243909359, 0.007693421561270952, -0.026237696409225464, -0.03641613945364952, -0.03781614452600479, 0.07407814264297485, -0.049504995346069336, -0.058521732687950134, -0.0636196881532669, 0.03243498131632805, 0.022008512169122696, -0.07106372714042664, -0.03315778821706772, -0.06941041350364685, -0.05003742501139641, 0.07462679594755173, -0.11113377660512924, -0.01230638101696968, 0.0377456359565258, -0.028031302616000175, 0.014535338617861271, -0.0315585732460022, -0.08058365434408188, 0.05835261568427086, 0.0025900916662067175, 0.03928031027317047, 0.02576957829296589, 0.04985060915350914, -0.0017561924178153276, -0.045529771596193314, 0.02926083840429783, -0.10201725363731




In [None]:
from fastapi import APIRouter, HTTPException
from typing import Dict, Any
from app.schemas import EmbeddingsRequest, EmbeddingsResponse
from app.utils.embeddings import *


# Instantiate router
router = APIRouter(
    prefix="/api/embeddings", 
    tags=["embeddings"], 
    responses={404: {"description": "Not found"}}
)


@router.post("/", response_model=EmbeddingsResponse)
async def main(request: EmbeddingsRequest):
    """
    Generate embeddings for query.
    """
    # Extract request
    model = request.model
    query = request.query

    try:
        # Load embeddings model
        embeddings_model = load_embeddings_model_hf(model)

        # Generate embeddings
        embeddings = generate_query_embeddings(query, embeddings_model)

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to generate embeddings: {e}")
    
    return {"embeddings": embeddings}
