In [12]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [13]:
from langchain.prompts import ChatPromptTemplate

template = """You are a helpful assistant that generates multiple sub-questions related to an input 
question.\n The goal is to break down the input into a set of sub-problems / sub-questions that can be 
answers in isolation.\n Generate multiple search queries related to : {question}\n Ouput (3 queries):
"""

prompt_decompositions = ChatPromptTemplate.from_template(template)

In [23]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-05-20")

generate_queries_decompositions_chain = (
    prompt_decompositions
    | llm 
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

question = "What is the name of the writter of the book and what the book says about ?"
questions = generate_queries_decompositions_chain.invoke({"question": question})

In [24]:
questions

['Here are three search queries related to your question, assuming you will replace `[Book Title]` with the actual name of the book you are interested in:',
 '',
 '1.  `[Book Title] author`',
 '2.  `[Book Title] summary`',
 '3.  `who wrote [Book Title] and what is it about`']

In [16]:
decomposition_template = """Here is the question you need to answer:
\n-----\n {question} \n-----\n

Here is any available background question + answer pairs: 

\n-----\n {q_a_pairs} \n-------\n

Here is additional context relevant to the questions:

\n-----\n {context} \n--------\n

Use the above context and any background question + answer pairs to answer the question: \n {question}"""


decomposition_prompt = ChatPromptTemplate.from_template(decomposition_template)

In [17]:
from operator import itemgetter 
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

In [18]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

docs_path = "dev-data/Be_Good.pdf"

loader = PyPDFLoader(docs_path)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
splits = splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
)

retriever = vectorstore.as_retriever()


In [21]:
llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-05-20", temperature=0)

q_a_pairs = ""

questions = questions[2:]
for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm \
        | StrOutputParser()
    )

    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n----\n" + q_a_pair

In [22]:
answer

'The essay "Be Good" was written by Paul Graham.\n\nIn this essay, Paul Graham argues that being "good" is not merely a statement of values but a practical and effective strategy. He suggests that this approach can serve as a guide for strategy and even a design specification for software, emphasizing that it "works." Graham encourages going beyond simply "not being evil" to actively "being good," citing examples like Craigslist as successful entities that operate with a "good" ethos.'