In [None]:
%pip install langchain langchain_community langchain_ollama langchain_text_splitters

In [7]:
from langchain_ollama import OllamaEmbeddings

embedding_model = OllamaEmbeddings(
    # model="llama3.2:latest",
    model="nomic-embed-text:latest",
    base_url="http://localhost:11434"  # Ensure Ollama is running locally
)

In [None]:
from langchain_community.document_loaders import TextLoader

# Load a text document
file_path = "../../00-example_data/state_of_the_union.txt"
document_loader = TextLoader(file_path)
raw_documents = document_loader.load()

print("Number of documents loaded:", len(raw_documents))
print("Sample Document:", raw_documents[0].page_content[:200])

In [None]:
from langchain_text_splitters import CharacterTextSplitter

# Split text into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(raw_documents)

print("Number of chunks created:", len(chunks))
print("Sample Chunk:", chunks[0].page_content[:200])

In [None]:
# Generate embeddings for each chunk
chunk_embeddings = [embedding_model.embed_query(chunk.page_content) for chunk in chunks]

print("Number of embeddings generated:", len(chunk_embeddings))
print("First Chunk Embedding (first 5 values):", chunk_embeddings[0][:5])

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.schema import Document

# Convert chunks to LangChain Documents
documents = [Document(page_content=chunk.page_content) for chunk in chunks]

# Create an in-memory vector store
vector_store = InMemoryVectorStore.from_documents(documents, embedding_model)

print("Number of documents in vector store:", len(vector_store.store))

In [None]:
# Define a query
query = "What is the state of the union?"

# Search for similar chunks
similar_docs = vector_store.similarity_search(query, k=3)

print("Most similar documents:")
for i, doc in enumerate(similar_docs, start=1):
    print(f"Result {i}:")
    print(doc.page_content[:200])  # Print first 200 characters
    print("-" * 50)