In [1]:
## embeddings

from langchain_openai import OpenAIEmbeddings
# Initialize the embeddings model
embeddings_model = OpenAIEmbeddings()

# Create embeddings for the original example sentences
text1 = "The cat sat on the mat"
text2 = "A feline rested on the carpet"
text3 = "Python is a programming language"

embeddings = embeddings_model.embed_documents([text1, text2, text3])

print(f"Embedding for document 1 {embeddings[0]}")
print(f"dimensions embedding {len(embeddings[0])}")

Embedding for document 1 [0.004307975061237812, 0.00400943448767066, -0.015408555045723915, -0.007614394649863243, -0.011325288563966751, 0.007864783518016338, -0.008346300572156906, -0.038495708256959915, -0.003816827666014433, -0.011890268884599209, 0.044376641511917114, 0.011055638082325459, 0.010111864656209946, -0.00034207795397378504, -0.0201466865837574, -0.010798829607665539, 0.031279366463422775, 0.0015785744180902839, 0.016859527677297592, -0.01856730878353119, -0.01422723289579153, 0.009899997152388096, -0.010227428749203682, -0.006192313041538, -0.007993187755346298, 0.00868657324463129, -0.0014493672642856836, -0.020454857498407364, -0.003592119552195072, 0.00583278015255928, 0.004108948167413473, -0.006702721584588289, -0.01872139424085617, -0.025809330865740776, -0.031099600717425346, -0.007896884344518185, 0.002529571298509836, 0.0123910466209054, -0.008217896334826946, -0.018939683213829994, 0.014214392751455307, 0.008140853606164455, -0.017347466200590134, -8.09049452

In [2]:
# Vector store interface in LangChain

from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_chroma import Chroma

# Initialize with an embedding model
embeddings = OpenAIEmbeddings()
vector_store = Chroma(embedding_function=embeddings)

# Adding documents
docs = ["The cat sat on the mat", "A feline rested on the carpet", "Python is a programming language"]
docs = [Document(page_content=doc) for doc in docs]
ids = vector_store.add_documents(docs)
ids

['98979ab1-256d-4263-8637-1da4ef00ae64',
 'af6ceccb-c338-42e3-9971-5b00dca56b69',
 '8e87f85d-0a52-4fe5-846a-57fb5101f706']

In [3]:
# Similarity search
results = vector_store.similarity_search("A cat on a mat", k=3)
results

[Document(id='98979ab1-256d-4263-8637-1da4ef00ae64', metadata={}, page_content='The cat sat on the mat'),
 Document(id='af6ceccb-c338-42e3-9971-5b00dca56b69', metadata={}, page_content='A feline rested on the carpet'),
 Document(id='8e87f85d-0a52-4fe5-846a-57fb5101f706', metadata={}, page_content='Python is a programming language')]

In [4]:
vector_store.delete(ids=[ids[0]])

In [5]:
results = vector_store.similarity_search("A cat on a mat", k=3)
results

[Document(id='af6ceccb-c338-42e3-9971-5b00dca56b69', metadata={}, page_content='A feline rested on the carpet'),
 Document(id='8e87f85d-0a52-4fe5-846a-57fb5101f706', metadata={}, page_content='Python is a programming language')]

In [6]:
# Maximum marginal relevance search
# Find relevant BUT diverse documents (reduce redundancy)

results = vector_store.max_marginal_relevance_search("A cat on a mat", k=3, fetch_k=5, lambda_mult=0.5)
results

[Document(id='af6ceccb-c338-42e3-9971-5b00dca56b69', metadata={}, page_content='A feline rested on the carpet'),
 Document(id='8e87f85d-0a52-4fe5-846a-57fb5101f706', metadata={}, page_content='Python is a programming language')]