In [2]:
# Cell 1: Import necessary modules
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings

In [3]:
# Cell 2: Load documents
print("Loading documents")
loader = DirectoryLoader("./bible", glob="**/*.docx")
books = loader.load()
print(len(books))

Loading documents
1


In [4]:
# Cell 3: Splitting into chunks
print("Splitting into chunks")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(books)
print(type(all_splits))
print(len(all_splits))

Splitting into chunks
<class 'list'>
48


In [5]:
# Cell 4: Storing in ChromaDB
print("Storing in ChromaDB")
vectorstore = Chroma.from_documents(
    documents=all_splits,
    embedding=OllamaEmbeddings(model="llama3", show_progress=True),
    persist_directory="./chroma_db",
)

Storing in ChromaDB


OllamaEmbeddings: 100%|██████████| 48/48 [00:11<00:00,  4.17it/s]


In [7]:
question = "Who is Elizabeth?"
docs = vectorstore.similarity_search(question)
docs

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  5.20it/s]


[Document(page_content='sitting among the teachers, listening to them and asking them questions.', metadata={'source': 'bible/luke123.docx'}),
 Document(page_content='sitting among the teachers, listening to them and asking them questions.', metadata={'source': 'bible/luke123.docx'}),
 Document(page_content='5 He went there to register with Mary, who was pledged to be married to him and was expecting a child. 6 While they were there, the time came for the baby to be born, 7 and she gave birth to her firstborn, a son. She wrapped him in cloths and placed him in a manger, because there was no guest room available for them.', metadata={'source': 'bible/luke123.docx'}),
 Document(page_content='5 He went there to register with Mary, who was pledged to be married to him and was expecting a child. 6 While they were there, the time came for the baby to be born, 7 and she gave birth to her firstborn, a son. She wrapped him in cloths and placed him in a manger, because there was no guest room av

In [8]:
from langchain import hub
from langchain_community.llms import Ollama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

llm = Ollama(model="llama3")

retriever = vectorstore.as_retriever()


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_prompt = hub.pull("rlm/rag-prompt")
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [16]:
question = "Who is Zechariah?"
qa_chain.invoke(question)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  4.22it/s]


"Zechariah is an old man who questioned the angel Gabriel about having a child with his wife, despite their advanced age. He was then instructed by Gabriel to remain silent until the appointed time, indicating that he did not believe Gabriel's words initially."

In [21]:
question = "What is the story of Luke 3"
qa_chain.invoke(question)

OllamaEmbeddings: 100%|██████████| 1/1 [00:01<00:00,  1.14s/it]


