In [6]:
import inspect

# Basics

In [10]:
from langchain.embeddings.base import Embeddings

In [12]:
# All embedding models inherit from this class
# `embed_documents` embeds a list of documents
# `embed_query` embeds a single user query
print(inspect.getsource(Embeddings))

class Embeddings(ABC):
    """Interface for embedding models."""

    @abstractmethod
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed search docs."""

    @abstractmethod
    def embed_query(self, text: str) -> List[float]:
        """Embed query text."""



In [29]:
from langchain.embeddings import OpenAIEmbeddings
import numpy as np

embedding_model = OpenAIEmbeddings()

In [30]:
document_embeddings = embedding_model.embed_documents(
    [
        # first paragraph of of the wikipedia article on word embeddings
        """
        In natural language processing (NLP), a word embedding is a representation of a word. 
        The embedding is used in text analysis. Typically, the representation is a real-valued 
        vector that encodes the meaning of the word in such a way that words that are closer in 
        the vector space are expected to be similar in meaning.[1] Word embeddings can be obtained 
        using language modeling and feature learning techniques, where words or phrases from the 
        vocabulary are mapped to vectors of real numbers.
        """,
        # first paragraph of of the wikipedia article on Barack Obama
        """
        Barack Hussein Obama II (born August 4, 1961) is an American politician who served as the 
        44th president of the United States from 2009 to 2017. A member of the Democratic Party, he 
        was the first African-American president of the United States.[2] Obama previously served 
        as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator 
        from 1997 to 2004, and worked as a civil rights lawyer before holding public office.
        """
    ]
)
document_embeddings


[[-0.02607034115969835,
  -0.002031953050019753,
  0.009897784212956123,
  0.0021038383462449618,
  -0.0015822716335932747,
  -0.00119169578875887,
  0.02798727866708033,
  -0.0029856292177984226,
  -0.007712476329983872,
  -0.02834510769717915,
  -0.004718859624218675,
  0.016830706769959694,
  -0.03432594944195551,
  0.012843477387904515,
  -0.014492043122712383,
  0.029648624010106015,
  0.025865869290858438,
  0.0021501641932039,
  0.007623019072459168,
  0.0009872225229351132,
  -0.02218534957470825,
  -0.0021629438014217147,
  -0.01290098562488468,
  -0.018300357989718154,
  -0.011284368910621349,
  0.0025239670350830634,
  0.01869652398182529,
  -0.04334833421728156,
  0.005450490797704733,
  -0.002920133958512762,
  -0.01727799119493809,
  -0.011495231514892732,
  -0.004658156485184054,
  -0.0037699758095216854,
  -0.011060725766809588,
  0.008568707752271063,
  0.0076485782888947975,
  -0.05934836645308329,
  0.02383391530951612,
  -0.00010942514798248422,
  0.0227093116489935

In [31]:
query_embedding = embedding_model.embed_query("How are word embeddings used in natural language processing?")
query_embedding

[-0.015196090564131737,
 0.00942157581448555,
 0.018208879977464676,
 -0.01282417867332697,
 0.0014452803879976273,
 0.01723104529082775,
 0.013663267716765404,
 -0.006260128691792488,
 -0.005179884843528271,
 -0.03287640959024429,
 0.003604116151109338,
 0.013471664860844612,
 -0.007545189466327429,
 0.011496173217892647,
 -0.003647061763331294,
 0.026348698884248734,
 0.02835722640156746,
 0.011767059564590454,
 0.011079931631684303,
 -0.010518337599933147,
 -0.020190978422760963,
 -0.0033530504442751408,
 -0.009467825293540955,
 -0.014152185060083866,
 -0.020600613206624985,
 0.0023586975876241922,
 0.03298212215304375,
 -0.03113216534256935,
 -0.015182876028120518,
 -0.012308833189308643,
 0.00778964813798666,
 0.004036874510347843,
 4.9810554628493264e-05,
 -0.0121766934171319,
 -0.02065346948802471,
 0.019305642694234848,
 0.007346979342401028,
 -0.012744895182549953,
 0.013299882411956787,
 -0.002685743849724531,
 0.013795407488942146,
 0.020455259829759598,
 0.00462820054963231

In [32]:
# dot product of query embedding and first document embedding
np.dot(query_embedding, document_embeddings[0])

0.885833900139056

In [33]:
# dot product of query embedding and second document embedding
np.dot(query_embedding, document_embeddings[1])

0.6771809486274986