In [16]:
import getpass
import os
import os
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chat_models import init_chat_model
from langchain_community.vectorstores import Chroma
load_dotenv()

# Set Groq variables using OpenAI-compatible names
os.environ["OPENAI_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["USER_AGENT"] = "MyAppName/1.0"

In [7]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()
file_pointer = open("reference.txt", "w")
for doc in blog_docs:
    file_pointer.write(doc.page_content+"\n")
file_pointer.close()

In [12]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings


with open("reference.txt", "r", encoding="utf-8") as file:
    file_content = file.read()

# 2. Wrap in a Document object
document = Document(page_content=file_content)
docs = [document]

# 3. Split the document
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50
)
splits = text_splitter.split_documents(docs)

# 4. Embed and store in Chroma
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
)

# 5. Create a retriever (top 1 result)
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})


In [None]:
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""

prompt = ChatPromptTemplate.from_template(template)
llm_model = init_chat_model("gemini-2.0-flash", model_provider="google_genai")
chain = prompt | llm_model |StrOutputParser()
respone = chain.invoke({"question": "What is task decomposition for llm models"})
print(respone)
questions = respone.split("\n")

# Store all retrieved docs
all_results = []

for q in questions:
    if q.strip():  # skip any empty lines
        docs = retriever.get_relevant_documents(q)
        all_results.extend(docs)

# Optional: remove duplicates (based on page_content)
unique_results = {doc.page_content: doc for doc in all_results}.values()

# Print or use results
for doc in unique_results:
    print(doc.page_content[:300])  # print first 300 chars for readability




How can large language models break down complex problems into smaller steps?
What are the methods used to decompose a task when working with large language models?
Explain the concept of task decomposition in the context of large language models.
What are the benefits of using task decomposition with large language models?
What are some examples of task decomposition strategies employed by large language models?
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructe
