In [1]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
_=load_dotenv()

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [4]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(model='gpt-3.5-turbo-0125')

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
import bs4 

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [6]:
# Docs to index
urls = [
    "https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io"
]

In [7]:
docs = [WebBaseLoader(url).load() for url in urls]

In [10]:
docs_list = [item for sublist in docs for item in sublist]

In [11]:
docs_list

[Document(metadata={'source': 'https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io', 'title': 'Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance', 'description': 'I think AI agent workflows will drive massive AI progress this year — perhaps even more than the next generation of foundation models. This is an important...', 'language': 'en'}, page_content='Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance✨ New course! Enroll in Design, Develop, and Deploy Multi-Agent Systems with CrewAIExplore CoursesAI NewsletterThe BatchAndrew\'s LetterData PointsML ResearchBlog✨ AI Dev x NYCCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesMembershipStart LearningWeekly IssuesAndrew\'s LettersData PointsML ResearchBusinessScienceCultureHardwareAI CareersAboutSubscribeThe BatchLettersArticleAgentic Design Patterns Part 1 Four AI agent strategies that improve GPT-4 and GPT-3.5 performanceLettersTechni

In [13]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

In [14]:
doc_splits

[Document(metadata={'source': 'https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io', 'title': 'Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance', 'description': 'I think AI agent workflows will drive massive AI progress this year — perhaps even more than the next generation of foundation models. This is an important...', 'language': 'en'}, page_content="Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance✨ New course! Enroll in Design, Develop, and Deploy Multi-Agent Systems with CrewAIExplore CoursesAI NewsletterThe BatchAndrew's LetterData PointsML ResearchBlog✨ AI Dev x NYCCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesMembershipStart LearningWeekly IssuesAndrew's LettersData PointsML ResearchBusinessScienceCultureHardwareAI CareersAboutSubscribeThe BatchLettersArticleAgentic Design Patterns Part 1 Four AI agent strategies that improve GPT-4 and GPT-3.5 performanceLettersTechnica

In [22]:
# splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
# chunks = splitter.split_documents(docs_list)
# chunks

In [23]:
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag",
    embedding=embedding_model,
)

In [24]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}, # number of documents to retrieve
)

In [25]:
question = "what are the different kind of agentic design patterns?"

In [26]:
docs = retriever.invoke(question)

In [28]:
print(f"Title: {docs[0].metadata['title']}\n\nSource: {docs[0].metadata['source']}\n\nContent: {docs[0].page_content}\n")

Title: Agentic Design Patterns Part 5, Multi-Agent Collaboration

Source: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io

Content: above emphasized clear, efficient code as opposed to, say, scalable and highly secure code. By decomposing the overall task into subtasks, we can optimize the subtasks better.Perhaps most important, the multi-agent design pattern gives us, as developers, a framework for breaking down complex tasks into subtasks. When writing code to run on a single CPU, we often break our program up into different processes or threads. This is a useful abstraction that lets us decompose a task, like implementing a web browser, into subtasks that are easier to code. I find thinking through multi-agent roles to be a useful abstraction as well.In many companies, managers routinely decide what roles to hire, and then how to split complex projects — like writing a large piece of software or preparin

In [30]:
from pydantic import BaseModel, Field

In [43]:
class GradeDocuments(BaseModel):
    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

In [44]:
structured_llm_grader = llm.with_structured_output(GradeDocuments)



In [34]:
# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

In [35]:
from langchain_core.prompts import ChatPromptTemplate
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

In [36]:
retrieval_grader = grade_prompt | structured_llm_grader

In [38]:
docs_to_use = []
for doc in docs:
    print(doc.page_content, '\n', '-'*50)
    res = retrieval_grader.invoke({"question": question, "document": doc.page_content})
    print(res,'\n')
    if res.binary_score == 'yes':
        docs_to_use.append(doc)

above emphasized clear, efficient code as opposed to, say, scalable and highly secure code. By decomposing the overall task into subtasks, we can optimize the subtasks better.Perhaps most important, the multi-agent design pattern gives us, as developers, a framework for breaking down complex tasks into subtasks. When writing code to run on a single CPU, we often break our program up into different processes or threads. This is a useful abstraction that lets us decompose a task, like implementing a web browser, into subtasks that are easier to code. I find thinking through multi-agent roles to be a useful abstraction as well.In many companies, managers routinely decide what roles to hire, and then how to split complex projects — like writing a large piece of software or preparing a research report — into smaller tasks to assign to employees with different specialties. Using multiple agents is analogous. Each agent implements its own workflow, has its own memory (itself a rapidly evolvin

In [39]:
from langchain_core.output_parsers import StrOutputParser

# Prompt
system = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge. 
Use three-to-five sentences maximum and keep the answer concise."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved documents: \n\n <docs>{documents}</docs> \n\n User question: <question>{question}</question>"),
    ]
)

In [40]:
# Post-processing
def format_docs(docs):
    return "\n".join(f"<doc{i+1}>:\nTitle:{doc.metadata['title']}\nSource:{doc.metadata['source']}\nContent:{doc.page_content}\n</doc{i+1}>\n" for i, doc in enumerate(docs))

In [41]:
# Chain
rag_chain = prompt | llm | StrOutputParser()

In [42]:
# Run
generation = rag_chain.invoke({"documents":format_docs(docs_to_use), "question": question})
print(generation)

The different kinds of agentic design patterns discussed in the retrieved documents are:
1. **Planning:** Involves using a large language model to autonomously decide on a sequence of steps to accomplish a task.
2. **Reflection:** Focuses on enabling large language models to reflect on their behavior and improve their responses based on feedback.
3. **Multi-Agent Collaboration:** Involves breaking down complex tasks into subtasks performed by different roles or agents, with the aim of achieving more effective outcomes through collaboration.


In [45]:
#Check for Hallucinations
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in 'generation' answer."""

    binary_score: str = Field(
        ...,
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

In [46]:
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

In [47]:
# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n <facts>{documents}</facts> \n\n LLM generation: <generation>{generation}</generation>"),
    ]
)

In [48]:
hallucination_grader = hallucination_prompt | structured_llm_grader

In [49]:
response = hallucination_grader.invoke({"documents": format_docs(docs_to_use), "generation": generation})
print(response)

binary_score='yes'


In [50]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

In [51]:
# Data model
class HighlightDocuments(BaseModel):
    """Return the specific part of a document used for answering the question."""

    id: List[str] = Field(
        ...,
        description="List of id of docs used to answers the question"
    )

    title: List[str] = Field(
        ...,
        description="List of titles used to answers the question"
    )

    source: List[str] = Field(
        ...,
        description="List of sources used to answers the question"
    )

    segment: List[str] = Field(
        ...,
        description="List of direct segements from used documents that answers the question"
    )

In [52]:
# parser
parser = PydanticOutputParser(pydantic_object=HighlightDocuments)

In [53]:
# Prompt
system = """You are an advanced assistant for document search and retrieval. You are provided with the following:
1. A question.
2. A generated answer based on the question.
3. A set of documents that were referenced in generating the answer.

Your task is to identify and extract the exact inline segments from the provided documents that directly correspond to the content used to 
generate the given answer. The extracted segments must be verbatim snippets from the documents, ensuring a word-for-word match with the text 
in the provided documents.

Ensure that:
- (Important) Each segment is an exact match to a part of the document and is fully contained within the document text.
- The relevance of each segment to the generated answer is clear and directly supports the answer provided.
- (Important) If you didn't used the specific document don't mention it.

Used documents: <docs>{documents}</docs> \n\n User question: <question>{question}</question> \n\n Generated answer: <answer>{generation}</answer>

<format_instruction>
{format_instructions}
</format_instruction>
"""


prompt = PromptTemplate(
    template= system,
    input_variables=["documents", "question", "generation"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [54]:
# Chain
doc_lookup = prompt | llm | parser

# Run
lookup_response = doc_lookup.invoke({"documents":format_docs(docs_to_use), "question": question, "generation": generation})

In [55]:

for id, title, source, segment in zip(lookup_response.id, lookup_response.title, lookup_response.source, lookup_response.segment):
    print(f"ID: {id}\nTitle: {title}\nSource: {source}\nText Segment: {segment}\n")

ID: doc3
Title: Agentic Design Patterns Part 4: Planning
Source: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io
Text Segment: Planning is a key agentic AI design pattern in which we use a large language model (LLM) to autonomously decide on what sequence of steps to execute to accomplish a larger task.

ID: doc4
Title: Agentic Design Patterns Part 2: Reflection
Source: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io
Text Segment: Last week, I described four design patterns for AI agentic workflows that I believe will drive significant progress this year: Reflection, Tool Use, Planning and Multi-agent collaboration.

ID: doc1
Title: Agentic Design Patterns Part 5, Multi-Agent Collaboration
Source: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io
Text Segment: Perhaps most important,