In [1]:
from langchain_openai import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_core.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [2]:
# load document
pdf = PyPDFLoader("Bridgewater-Associates-Ray-Dalio-Principles.pdf").load()
# split document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
pdf_split = text_splitter.split_documents(pdf)
# store chunks
embeddings = OpenAIEmbeddings()
vector_db = DocArrayInMemorySearch.from_documents(pdf_split, embeddings)
# retrieve most relevant chunks
vector_db_retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})



In [5]:
# select llm
llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

# build prompt
prompt = PromptTemplate(
    input_variables=["context", "input"],
    template="""
    Use this context to answer the following question using at most 3 sentences: {context}
    Question: {input}
    """)

# chain prompt
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(vector_db_retriever, combine_docs_chain)

# get result
result = chain.invoke({
    "input": "what is the most meaningful passage in this text?"
})
result

{'input': 'what is the most meaningful passage in this text?',
 'context': [Document(metadata={'source': 'Bridgewater-Associates-Ray-Dalio-Principles.pdf', 'page': 2}, page_content='Introduction'),
  Document(metadata={'source': 'Bridgewater-Associates-Ray-Dalio-Principles.pdf', 'page': 8}, page_content='Part 2:  \nMy Most Fundamental Life Principles'),
  Document(metadata={'source': 'Bridgewater-Associates-Ray-Dalio-Principles.pdf', 'page': 0}, page_content='beginning of Part 3 (through the Summary and Table of Principles) which will give you \nnearly the whole picture. It’s only about 55 pages of a normal size book.\nAbove all else, I want you to think for yourself—to decide 1) what you want, 2) what is \ntrue, and 3) what to do about it. I want you to do that in a clear-headed, thoughtful way, \nso that you get what you want. I wrote this book to help you do that. I am going to ask \nonly two things of you—1) that you be open-minded and 2) that you honestly answer some \nquestions a

In [6]:
result['answer']

"The most meaningful passage in this text is the emphasis on the importance of thinking for oneself, specifically the need to determine what one wants, what is true, and what actions to take. This highlights the core message of the book: that personal clarity and open-mindedness are essential for achieving one's goals and overcoming obstacles in life. By encouraging readers to engage in self-reflection and honest inquiry, the author aims to empower them to take control of their own lives."