In [1]:
import os
from dotenv import load_dotenv
load_dotenv()


google_api_key=os.getenv("GOOGLE_API_KEY")
langsmith=os.getenv("LANGCHAIN_API_KEY")

from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                  google_api_key= google_api_key)


# Set USER_AGENT environment variable
os.environ['USER_AGENT'] = 'my-app/1.0'
os.environ['LANGCHAIN_TRACING_V2'] = "true"



In [2]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter



# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()


In [3]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits,  embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")



In [4]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done by using simple prompts, task-specific instructions, or human inputs. This process helps LLMs reason through complex tasks and can be used to improve model performance. \n'