# 

## 1. Make sure you have the following packages in your environment installed

In [None]:
#!pip install langchain==0.3.25 langchain-community==0.3.25 langchain-ollama==0.3.3 chromadb==1.0.12

## 2. Load your documents

In [None]:
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = DirectoryLoader(
    "./documents",
    glob="**/*.txt",
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"}
)
docs = loader.load()
print(docs[0])

ModuleNotFoundError: No module named 'langchain'

## 3. Chunk your documents

In [2]:
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=128)
chunks = splitter.split_documents(docs)
print(chunks[0])

NameError: name 'RecursiveCharacterTextSplitter' is not defined

## 4. Embed and index chunks in a vector database
Make sure to execute `ollama pull all-minilm` in the terminal before running the following snippet.

In [None]:
from langchain_ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma

model = "all-minilm"            # 23M parameters
# model = "nomic-embed-text"    # 137M parameters
# model = "mxbai-embed-large"   # 334M parameters

embed_model = OllamaEmbeddings(model=model, base_url="http://127.0.0.1:11434")
vector_store = Chroma.from_documents(chunks, embed_model)
collection = vector_store._collection
print("Total embeddings:", collection.count())

In [None]:
all_data = collection.get(include=["documents", "embeddings"])
print(all_data['documents'][1])
print(all_data['embeddings'][1])

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k": 4})
print(retriever)

## 5. Create a Retrieval-QA chain
Make sure to execute `ollama pull gemma3:1b` in the terminal before running the following snippet.

In [None]:
from langchain_ollama import OllamaLLM
from langchain.chains import RetrievalQA

model = "gemma3:1b"     # 1B parameters
# model = "llama3.2"    # 3B parameters
# model = "mistral"     # 7B parameters

llm = OllamaLLM(model=model, base_url="http://127.0.0.1:11434")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",            # or "map_reduce", "refine", etc.
    return_source_documents=True,  # if you want the source chunks back
)
print(llm)