In [2]:
# Data Ingestion

from langchain_community.document_loaders import TextLoader
loader = TextLoader("ronaldo.txt")
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'ronaldo.txt'}, page_content='Cristiano Ronaldo dos Santos Aveiro, commonly known as CR7, is a Portuguese professional footballer widely regarded as one of the greatest players in the history of the sport. Born on February 5, 1985, in Funchal, Madeira, Portugal, Ronaldo has achieved unparalleled success both individually and with his teams. Here’s a brief overview of his life and career:\n\nEarly Life and Career:\nRonaldo grew up in a humble family in Madeira and began his football journey at Andorinha, a local club, before moving to Nacional and eventually joining the Sporting CP academy.\nHis immense talent became evident at a young age, leading to his first-team debut for Sporting CP at 17.\nClub Career Highlights:\nManchester United (2003–2009):\n\nSigned by Sir Alex Ferguson in 2003 for a then-record fee for a teenager.\nHelped Manchester United win 3 Premier League titles, a UEFA Champions League (2008), and the FIFA Club World Cup (2008).\nWon his f

# Web based Loader

In [4]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load,chunk and index the content of the html page

loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title","post-content","post-header")
                     )))

text_documents=loader.load()
text_documents

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

# FROM PDF

In [6]:
from langchain_community.document_loaders import PyPDFLoader
import os

pdf_path = os.path.join("../bot1", "data", "monopoly.pdf") 

loader = PyPDFLoader(pdf_path)
docs = loader.load()
docs

[Document(metadata={'source': '../bot1\\data\\monopoly.pdf', 'page': 0}, page_content='MONOPOLY \nProperty Trading Game from Parker Brothers" \nAGES 8+ \n2 to 8 Players \nContents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance \nand Community Chest cards, Title Deed cards, play money and a Banker\'s tray. \nNow there\'s a faster way to play MONOPOLY. Choose to play by \nthe classic rules for buying, renting and selling properties or use the \nSpeed Die to get into the action faster. If you\'ve never played the classic \nMONOPOLY game, refer to the Classic Rules beginning on the next page. \nIf you already know how to play and want to use the Speed Die, just \nread the section below for the additional Speed Die rules. \nSPEED DIE RULES \nLearnins how to Play with the S~eed Die IS as \n/ \n fast as playing with i\'t. \n1. When starting the game, hand out an extra $1,000 to each player \n(two $5005 should work). The game moves fast and you\'ll need \nthe extra cash to buy and bu

# Load Data into chunks

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)    # 1000 --->500
# documents=text_splitter.split_documents(docs)
documents=text_splitter.split_documents(text_documents)
documents[:5]


# making the chunks of the data with overlap and splitting each chunks 


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

In [8]:
documents

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

# converting the chunks into vector

In [11]:
# ## Vector Embedding and Vector Store

# from langchain_community.embeddings import OpenAIEmbeddings
# from langchain_community.vectorstores import Chroma

# db = Chroma.from_documents(documents[:20], OpenAIEmbeddings())


In [17]:
# # from sentence_transformers import SentenceTransformer
# # from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.vectorstores import FAISS
# from langchain_community.embeddings import OpenAIEmbeddings

# db1 = FAISS.from_documents(documents[:20],OpenAIEmbeddings())


In [18]:
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
model = "sentence-transformers/all-mpnet-base-v2"

hf = HuggingFaceEmbeddings(
    model_name = model,
    model_kwargs = {'device' : 'cpu'},
    encode_kwargs = {'normalize_embeddings' : False},
)

  from tqdm.autonotebook import tqdm, trange
  hf = HuggingFaceEmbeddings(


In [19]:
# making the index of faiss
import faiss
index = faiss.IndexFlatL2(len(hf.embed_query("Hello World")))

In [20]:
from langchain_community.docstore.in_memory import InMemoryDocstore

# vector store combinding the index of faiss and embedding form our model
vector_store = FAISS(
    embedding_function = hf,
    index = index,
    docstore = InMemoryDocstore(),
    index_to_docstore_id = {},
)

In [21]:
## FAISS Vector Database - using OPENAI embeddings
# just making and storing the first fifteen chunks of the data 

vector_store.add_documents(documents=documents)

['e533cc2f-bb3f-4150-ac93-2f33dee4db66',
 '7c6701ba-b282-4f75-8a6a-f47378bb326c',
 'cf080da3-b249-465c-8163-a0ce87646d0e',
 '7be7683c-9ef4-4f6a-a9b9-0a60ac4e82be',
 '68e2ee05-2433-4186-a5b5-2ad26089c07c',
 'dbcd0922-c6b0-41a6-9cf6-95d96080fbba',
 '0d6c20e6-bd33-4443-a1c3-c144b46a3e2a',
 '3e8d75da-5b2f-420e-9faf-d85ececd127b',
 '7aa4dc85-c6d6-47c2-9bc7-4277df8428cf',
 '8f18a6c1-df21-4ed2-8e35-ff390c915a50',
 '72e4214b-88b9-41f4-bff0-36a96f65d648',
 '6d07bd3d-6f3b-4ff3-9137-2e4e46def036',
 '27e12074-4d04-4e0b-ad8f-5ed5d6c53abe',
 'aacc7206-eee4-4240-948e-54da13cd71e4',
 'bfa9170c-a80d-426c-a096-621b4314f212',
 '42ce6116-b803-4eec-8832-076118f60ef0',
 'd1729dc8-589b-4e1b-ba88-6d48bc3a24d9',
 'ec2657d2-d526-455f-b747-a58f4359d309',
 'cfe3eb2b-1a9f-44fa-9299-b04c67399270',
 'e35b6ffc-7f93-470a-bb7a-d3bb35e4eacb',
 'f778d406-d384-411f-a5f2-f7b0bee7abed',
 'd02a2af0-dd10-4efd-8185-6edcd047fbb3',
 '6eae9883-7d5d-48fa-ac5a-4e947155c9d2',
 'd0d380af-7c7d-46ce-b3fb-154c1a4c1954',
 'f88f9778-f890-

In [24]:
# using the similarity seacrh to get the most relevant chunk of the data

query = " Overview of a LLM-powered autonomous agent system."
retireved_results=vector_store.similarity_search(query, k=5)

print(retireved_results[0].page_content)

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.


In [25]:
retireved_results[2].page_content

'}\n]\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:'

In [28]:
from langchain.embeddings import OpenAIEmbeddings  # or your specific embedding model

# Initialize the embedding model
embeddings = OpenAIEmbeddings()  # Replace with the actual embedding class you used earlier

# Save the vector store
vector_store.save_local("FAISS_STORE")

# Load the vector store with embeddings and allow dangerous deserialization
loaded_vector_store = vector_store.load_local(
    "FAISS_STORE", embeddings=embeddings, allow_dangerous_deserialization=True
)
