Load Documents

In [1]:
import bs4
from langchain_community.document_loaders import WebBaseLoader


bs4_strainer = bs4.SoupStrainer(class_=('post-title',"post-header", "post-content"))

loader = WebBaseLoader(web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",), bs_kwargs = {"parse_only": bs4_strainer})

docs = loader.load()

assert len(docs) == 1
print(len(docs[0].page_content))

USER_AGENT environment variable not set, consider setting it to identify your requests.


43130


Splitting Documents

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True,
)

all_splits = text_splitter.split_documents(docs)

print(len(all_splits))

66


Storing Documents

In [3]:
import getpass
import os

if not os.environ.get("MISTRAL_API_KEY"):
  os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter API key for MistralAI: ")

from langchain_mistralai import MistralAIEmbeddings

embeddings = MistralAIEmbeddings(model="mistral-embed")

Enter API key for MistralAI:  ········




In [4]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("mistral-large-latest", model_provider="mistralai")

from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [5]:
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids)

['996ef128-4746-4afd-8652-36b690311fc6', 'a76b2168-1c0c-4117-ae6b-eb9b09e4d95a', 'db1a478e-285d-477d-833e-ac79ebc2c82b', '90e96abb-0371-4f0c-aa71-f3e522c97861', '3f9c9170-911c-4afd-ae9f-bc89ddeae6a0', '366634bc-f3ab-4692-9f32-b43be0865075', '55dbf4ac-54a1-4a19-a3b2-84e85eb86d9d', '0f2d5fb6-3fc4-4f0e-8fac-96c252a12e90', 'fee75257-2fd2-4c45-a187-536432698411', 'e3c21d99-39a0-4911-80b8-c1597c7fe966', '10afab59-dd7c-4ece-b9d5-147c0bb25a90', 'f88a430d-f5f0-420c-af20-417a2b0aa0bb', '0ad9f98e-486e-4407-a29e-1c13602e915e', 'b1580dad-792d-4efe-abef-6247a97ff4e6', '830c70bb-59d4-4079-81a7-3b9c3a59bae6', '50ab5583-4574-4a9b-9101-f082a936ac57', 'a5981465-a8f4-4efa-9410-814110dcbc7b', '8bd38de7-8e83-4219-8e33-0699d2f2b90a', 'c42c59aa-3140-4ee3-9114-33f2d0008fc4', '77f72b3e-7aec-44ce-b17a-efbd61496257', '43681a42-e99c-4a33-a741-aa031402f22b', 'c56df5e7-3613-4a60-90d8-271cb8bce341', 'a6118ce3-2301-4e08-93c3-7dc7f0cc3e4e', '5f86ad7e-9ee8-438c-b820-8b83e46761aa', '1ba4e6d5-b033-4230-938d-9e6798b863c2',

Retrieval and Generation

In [6]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_message =  prompt.invoke({
    "context": "(context goes here)",
    "question": "(question goes here)",
}).to_messages()

print(example_message)

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: (question goes here) \nContext: (context goes here) \nAnswer:", additional_kwargs={}, response_metadata={})]




In [7]:
question = "what is self reflection?"

retrieved_docs = vector_store.similarity_search(question)
docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
print(docs_content)
answer = llm.invoke(prompt.invoke({"question": question, "context": docs_content}))
answer.content

Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)
The heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.
Self-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.

Memory stream: is a long-term memory module (external database) that records a comprehensive list of agents’ experience in natural language.

Each element is an observation, an event directly provided by the agent.
- Inter-agent communication can trigger new natural language statements.


Re

"Self-reflection is a process where autonomous agents improve by refining past actions and correcting mistakes. It involves showing two-shot examples to a language model, each consisting of a failed trajectory and an ideal reflection for future planning. These reflections are then added to the agent's working memory to guide future behavior."

In [8]:
answer.content

"Self-reflection is a process where autonomous agents improve by refining past actions and correcting mistakes. It involves showing two-shot examples to a language model, each consisting of a failed trajectory and an ideal reflection for future planning. These reflections are then added to the agent's working memory to guide future behavior."

Query Analysis

In [9]:
total_documents = len(all_splits)

third = total_documents // 3

for i, doc in enumerate(all_splits):
    if i < third:
        doc.metadata["section"] = "beginning"
    elif i < 2 * third:
        doc.metadata["section"] = "middle"
    else:
        doc.metadata["section"] = "end"


all_splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 8,
 'section': 'beginning'}

In [10]:
vector_store = InMemoryVectorStore(embeddings)
_ = vector_store.add_documents(all_splits)

Using LangGraph

In [11]:
from typing_extensions import TypedDict, List, Annotated, Literal
from langchain_core.documents import Document



# search
class Search(TypedDict):
    query: Annotated[str, ..., "Search query to run"]
    section: Annotated[Literal["beginning", "middle", "end"], ..., "Section of the document to search in"]

# state
class State(TypedDict):
    question: str
    context: List[Document]
    query: Search
    answer: str

# steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["query"]["query"],
                                                    filter=lambda doc: doc.metadata.get("section") == state["query"]["section"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    answer = llm.invoke(prompt.invoke({"question": state["question"], "context": docs_content}))
    return {"answer": answer.content}

def analyze_query(state: State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {"query": query}


# control flow
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([analyze_query,retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [12]:
for step in graph.stream({"question": "what is self reflection?"}, stream_mode="updates"):
    print(step)

HTTPStatusError: Error response 429 while fetching https://api.mistral.ai/v1/chat/completions: {"message":"Requests rate limit exceeded"}

Playground

In [None]:
import requests

response = requests.get(
    "https://api.github.com/repos/hashicorp/terraform-provider-aws/contents/website/docs/r"
)

doc_files = response.json()

In [None]:
doc_files

In [None]:
file = doc_files[0]
if file["name"].endswith(".markdown"):
    file_response = requests.get(file["download_url"])
    doc = file_response.text
    print(doc)

In [None]:
import frontmatter

resource = frontmatter.loads(doc)
resource.metadata

In [None]:
import re


chunks = re.split(r"(##\s+.+)", doc)

print(chunks)

results = []
for i in range(1, len(chunks), 2):
    header = chunks[i].strip()
    body = chunks[i+1].strip()
    results.append({"header": header, "body": body})
    print({"header": header, "body": body})



In [None]:
metadata = dict(resource.metadata)

# Extract main content
main_content = resource.content

# Separate sections
sections = {}
current_section = "main"
sections[current_section] = []

for line in main_content.split('\n'):
        if line.startswith('## '):
            current_section = line.replace('## ', '').strip()
            sections[current_section] = []
        else:
            sections[current_section].append(line)

structured_content = {
        "title": metadata.get("page_title", ""),
        "description": metadata.get("description", ""),
        "resource_name": re.search(r'# Resource: (.*)', main_content).group(1) if re.search(r'# Resource: (.*)', main_content) else "",
        "frontmatter": metadata,
        "sections": {k: '\n'.join(v) for k, v in sections.items()},
        "full_content": main_content
    }

# for k,v in structured_content['sections'].items():
#     print(f"{k}:",v)  

print(structured_content['full_content'])

In [None]:
for section_name, content in structured_content.get("sections", {}).items():
    # Extract code blocks and surrounding text
    code_blocks = re.findall(r"```terraform\n(.*?)\n```", content, re.DOTALL)
    text = re.sub(r"```terraform\n(.*?)\n```", "", content, flags=re.DOTALL).strip()

print(code_blocks)
print(text)