In [1]:
import os
from dotenv import load_dotenv
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")
groq_api_key = os.getenv('GROQ_API')

In [2]:
from langchain_groq import ChatGroq

model = ChatGroq(model="Gemma2-9b-It", groq_api_key=groq_api_key)

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [5]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


In [6]:
import bs4
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",
),
bs_kwargs=dict(
  parse_only = bs4.SoupStrainer(
      class_=("post-content","post-title","post-header")
  )  
),
)

documents  =loader.load()

In [8]:
#chunk the datasize because llm has sequence length

text_splitter  =RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap = 200)
splits = text_splitter.split_documents(documents)
vectorStore = Chroma.from_documents(documents=splits,embedding=embedding)
retriever = vectorStore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7f9fcc48f8b0>)

In [9]:
#prompt template
system_prompt = (

    "you are an assistant for question -answering tasks."
    "use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer,say that you don't know"
    "use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"

)

prompt = ChatPromptTemplate.from_messages(

    [
        ("system", system_prompt),
        ("human","{input}"),
    ]
)

In [10]:
#create_stuff_documents_chain combines the document and send to prompt and then llm
question_answer_chain = create_stuff_documents_chain(model,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [11]:
response = rag_chain.invoke({"input":"What is self-reflection?"})
response

{'input': 'What is self-reflection?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Memory stream: is a long-term memor

In [12]:
response['answer']

'Self-reflection is a process that allows autonomous agents to learn and improve by analyzing past actions and decisions. \n\nIt involves identifying mistakes, understanding the reasons behind them, and using that knowledge to refine future actions. Self-reflection is crucial for agents operating in complex, real-world environments where trial and error is common. \n\n\n\n'

In [14]:
rag_chain.invoke({"input":"How do we achieve it?"})

{'input': 'How do we achieve it?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated 

the second question asked was based on first question so to tackle this we use char history

#CHAT History

In [None]:
#the retriever also know the history of chat
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder 

<!-- CHAT history -->