## Embeddings Techniques using HuggingFace

In [2]:
import os
from dotenv import load_dotenv
load_dotenv() # Load environment variables from .env file

True

In [3]:
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

Hugging Face sentence-transformers is a Python framework for generating state-of-the-art embeddings for sentences, text, and images. One of the embedding methods is used within the HuggingFaceEmbeddings class in LangChain. An alias is also provided as SentenceTransformerEmbeddings for users who are more familiar with the original sentence-transformers package.

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)


In [7]:
text = "these is a test document"
query_result=embeddings.embed_query(text)
query_result

[-0.0721929743885994,
 0.1003042683005333,
 -0.04553227126598358,
 0.05173199623823166,
 0.00985720008611679,
 -0.041663311421871185,
 -0.05341549962759018,
 0.04364681988954544,
 -0.0092405891045928,
 0.03647846356034279,
 0.06589924544095993,
 0.03995156288146973,
 -0.010142799466848373,
 0.0010761985322460532,
 -0.06875663250684738,
 -0.03407135233283043,
 -0.02941090427339077,
 -0.038505006581544876,
 -0.0008356176549568772,
 0.09539645165205002,
 0.035659823566675186,
 0.06993426382541656,
 -0.012472042813897133,
 0.006257838569581509,
 -0.0017971477936953306,
 0.05402478575706482,
 -0.07214553654193878,
 0.02240925282239914,
 0.06751727312803268,
 -0.017287179827690125,
 0.03459765017032623,
 0.05195480212569237,
 0.07265202701091766,
 0.051087696105241776,
 0.08393660187721252,
 -0.0024897768162190914,
 0.05297567695379257,
 -0.008533553220331669,
 0.028991328552365303,
 0.029397914186120033,
 -0.011062429286539555,
 -0.11759639531373978,
 0.021160295233130455,
 0.02532777935266

In [8]:
len(query_result)  # Should return the length of the embedding vector

384

In [9]:
doc_result=embeddings.embed_documents([text,"This is a not a test document"])
doc_result

[[-0.07219294458627701,
  0.10030423104763031,
  -0.04553229734301567,
  0.051731985062360764,
  0.009857196360826492,
  -0.041663285344839096,
  -0.05341549217700958,
  0.043646808713674545,
  -0.009240611456334591,
  0.03647846728563309,
  0.06589923799037933,
  0.03995158150792122,
  -0.010142780840396881,
  0.0010762116871774197,
  -0.06875665485858917,
  -0.03407133370637894,
  -0.02941092476248741,
  -0.038504984229803085,
  -0.0008356372127309442,
  0.09539642184972763,
  0.03565983101725578,
  0.06993428617715836,
  -0.012472082860767841,
  0.006257819011807442,
  -0.0017971635097637773,
  0.054024796932935715,
  -0.0721455067396164,
  0.022409221157431602,
  0.06751727312803268,
  -0.01728714630007744,
  0.03459766507148743,
  0.05195479094982147,
  0.07265203446149826,
  0.05108768865466118,
  0.08393657952547073,
  -0.002489793347194791,
  0.05297563225030899,
  -0.008533555082976818,
  0.02899128757417202,
  0.029397904872894287,
  -0.011062425561249256,
  -0.11759641021490

In [10]:
doc_result[0]  # Should return the embedding vector for the first document

[-0.07219294458627701,
 0.10030423104763031,
 -0.04553229734301567,
 0.051731985062360764,
 0.009857196360826492,
 -0.041663285344839096,
 -0.05341549217700958,
 0.043646808713674545,
 -0.009240611456334591,
 0.03647846728563309,
 0.06589923799037933,
 0.03995158150792122,
 -0.010142780840396881,
 0.0010762116871774197,
 -0.06875665485858917,
 -0.03407133370637894,
 -0.02941092476248741,
 -0.038504984229803085,
 -0.0008356372127309442,
 0.09539642184972763,
 0.03565983101725578,
 0.06993428617715836,
 -0.012472082860767841,
 0.006257819011807442,
 -0.0017971635097637773,
 0.054024796932935715,
 -0.0721455067396164,
 0.022409221157431602,
 0.06751727312803268,
 -0.01728714630007744,
 0.03459766507148743,
 0.05195479094982147,
 0.07265203446149826,
 0.05108768865466118,
 0.08393657952547073,
 -0.002489793347194791,
 0.05297563225030899,
 -0.008533555082976818,
 0.02899128757417202,
 0.029397904872894287,
 -0.011062425561249256,
 -0.11759641021490097,
 0.021160293370485306,
 0.02532775327