**Make sure you load the API keys for cloud providers!**

You can set your environment keys yourself or use a script. Please note that since keys are private, they are not included in the repository.

In [1]:
# setting the environment variables, the keys
import sys
import os

sys.path.insert(0, os.path.abspath('..'))

from config import set_environment
# for the keys - as explained early in chapter 2
set_environment()

# Basic Embeddings Usage

In [2]:
from langchain_openai import OpenAIEmbeddings

# Initialize the embeddings model
embeddings_model = OpenAIEmbeddings()

# Create embeddings for example sentences
text1 = "The cat sat on the mat"
text2 = "A feline rested on the carpet"
text3 = "Python is a programming language"

# Get embeddings using LangChain
embeddings = embeddings_model.embed_documents([text1, text2, text3])

# These similar sentences will have similar embeddings
embedding1 = embeddings[0]  # Embedding for "The cat sat on the mat"
embedding2 = embeddings[1]  # Embedding for "A feline rested on the carpet"
embedding3 = embeddings[2]  # Embedding for "Python is a programming language"

# Output shows number of documents and embedding dimensions
print(f"Number of documents: {len(embeddings)}")
print(f"Dimensions per embedding: {len(embeddings[0])}")
# Typically 1536 dimensions with OpenAI's embeddings

Number of documents: 3
Dimensions per embedding: 1536


# Vector Store Setup

In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

# Initialize with an embedding model
embeddings = OpenAIEmbeddings()

# Create some sample documents with explicit IDs
docs = [
    Document(page_content="Content about language models", metadata={"id": "doc_1"}),
    Document(page_content="Information about vector databases", metadata={"id": "doc_2"}),
    Document(page_content="Details about retrieval systems", metadata={"id": "doc_3"})
]

# Create the vector store
vector_store = Chroma(embedding_function=embeddings)

# Add documents with explicit IDs
vector_store.add_documents(docs)

# Similarity Search with appropriate k value
results = vector_store.similarity_search("How do language models work?", k=2)

# For MMR, adjust the parameters based on available documents
found_docs = vector_store.similarity_search("retrieval", k=1)
print(f"Found documents: {len(found_docs)}")

if len(remaining_docs) > 0:
    mmr_results = vector_store.max_marginal_relevance_search(
        "retrieval systems",
        k=1,  # Only request what's available
        fetch_k=1,  # Only fetch what's available
        lambda_mult=0.5
    )

Found documents: 1
