In [33]:
!pip install langchain-community
!pip install bs4
!pip install requests
!pip install langchain
!pip install chromadb
!pip install huggingface_hub
!pip install sentence_transformers
!pip install unidecode





## Import Necessary Requriments

In [34]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
import requests
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceEndpoint


from pathlib import Path
import chromadb
from unidecode import unidecode


## Loading the data

In [35]:
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only = bs4.SoupStrainer(
                           class_=("post-title","post-content","post-header"))

                       ))

In [36]:
text_documents = loader.load()

## splitting the data into chunks

In [37]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents = text_splitter.split_documents(text_documents)

In [38]:
documents[0:5]

[Document(page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n

In [39]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

## converting the text into vectors

In [40]:
embeddings=HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")

In [41]:
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(documents,embeddings)

## user query

In [42]:
query = "what is ReAct"

result = db.similarity_search(query)
result[0].page_content

'},\n  {\n    "role": "user",\n    "content": "We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\\n"\n  },\n  {\n    "role": "assistant",\n    "content": "Summary of areas that need clarification:\\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\\n2. Details about the MVC components (e.g. which components are in each file)\\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\\n\\nClarifying question:\\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?"\n  },\n  {\n    "role": "user",'

## Retriever and chain with Langchain

In [43]:
from langchain_community.llms import HuggingFaceHub



llm = HuggingFaceHub(repo_id="google/flan-t5-base",
                          model_kwargs={"temperature": 0.5, "max_new_tokens": 250})


In [44]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
              answer the following question based on provides context
              <context>
              {context}
              </context>
              question : {input}""")

In [45]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [46]:
retriver = db.as_retriever()
retriver

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7ef29e8b62f0>)

In [47]:
from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriver,document_chain)

In [48]:
response = retrieval_chain.invoke({"input":"what is task decomposition"})


In [49]:
response['answer']

'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote. Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline. for writing a novel, or (3) with human inputs.'