In [None]:
%pip install --upgrade --quiet langchain-openai langchain-ollama chromadb

In [None]:
%pip install chroma-hnswlib

In [4]:
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter


In [5]:
from langchain_ollama import OllamaEmbeddings

underlying_embeddings = OllamaEmbeddings(
    #model="nomic-embed-text:latest",
    model="llama3.2:latest",
    base_url="http://localhost:11434",  # Replace with your Ollama base URL
)


In [6]:
store = LocalFileStore("./cache/")

cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings, store, namespace="llama3_2_latest"
)

In [None]:
# Verify the keys in the cache
print("Keys in cache before loading:", list(store.yield_keys()))



In [8]:
# Load the text document
file_path = "../../00-example_data/state_of_the_union.txt"
raw_documents = TextLoader(file_path).load()

In [9]:
# Split the text into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

In [None]:
# Generate embeddings and verify caching without using vector store
print("Generating and caching embeddings...")
for i, doc in enumerate(documents):
    embedding = cached_embedder.embed_query(doc.page_content)
    print(f"Document {i+1}: Embedding cached.")

In [20]:
from langchain.vectorstores import Chroma

# Initialize ChromaDB vector store
vector_store = Chroma.from_documents(documents, cached_embedder)

In [None]:
# Verify cache content after embedding generation
print("Keys in cache after embedding:", list(store.yield_keys()))