In [1]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [2]:
from langchain.prompts import ChatPromptTemplate

# HyDE document genration
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-05-20", temperature=0)

generate_docs_for_retrieval = (
    prompt_hyde | llm | StrOutputParser() 
)

question = "What is the name of the writter of the book and what the book says about ?"

generate_docs_for_retrieval.invoke({"question":question})

"To accurately identify the author of a book and provide a comprehensive summary of its content, the specific title of the literary work must first be provided. Without this foundational information, a precise answer remains unattainable.\n\nHowever, the general approach to addressing such a query within a scientific or academic context would involve two distinct components:\n\n1.  **Author Identification:** The author is the individual or collective entity credited with the creation of the text. This information is typically found on the title page of the book, in its bibliographic data, and within academic databases. For instance, if the book in question were *On the Origin of Species*, the author would be identified as Charles Darwin.\n\n2.  **Content Summary:** A concise summary of the book's content would delineate its central thesis, primary arguments, scope, and key findings or contributions. This involves distilling the core message and the principal topics explored. Continuing

In [5]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

doc_path = "dev-data/Be_Good.pdf"
loader = PyPDFLoader(doc_path)

doc = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
doc_splits = text_splitter.split_documents(doc)

chromadb = Chroma.from_documents(
    documents=doc_splits,
    embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
)
retriever = chromadb.as_retriever()

In [6]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

[Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'title': 'Be Good - Essay by Paul Graham', 'page_label': '3', 'page': 2, 'author': 'Paul Graham', 'source': 'dev-data/Be_Good.pdf', 'creationdate': 'D:20240613143635', 'total_pages': 11}, page_content="know when I come across a powerful one.One way to guess how far an idea extends is to ask\nyourself at what\npoint you'd bet against it.  The thought of betting against benevolence\nis alarming in the same way as saying that something is technically\nPage 3"),
 Document(metadata={'page': 9, 'title': 'Be Good - Essay by Paul Graham', 'page_label': '10', 'creator': 'PyPDF', 'total_pages': 11, 'creationdate': 'D:20240613143635', 'author': 'Paul Graham', 'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'source': 'dev-data/Be_Good.pdf'}, page_content="either.  And starting an organic farm, though it's at least\nstraightforwardly benevolent, doesn't help people on the scale that\nPage 10"

In [7]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":retireved_docs,"question":question})

'The writer of the essay is **Paul Graham**.\n\nThe essay, titled "Be Good," discusses the concept of **benevolence** and what it means to "be good," including the implications of telling people to be good.'