In [3]:
from llm_providers import GoogleAIModel
import os
from dotenv import load_dotenv

load_dotenv()

key = os.environ.get('GOOGLE_API_KEY')

llm = GoogleAIModel(api_key=key)

In [1]:
from multi_agent import SyntheticDataGenerator
from langchain.retrievers import BM25Retriever
from utils import pdf_parser

pdf_files = pdf_parser('./test_pdf/book.pdf')

INFO:__name__:Calling pdf_parser
INFO:__name__:Extracted 535 pages. First page preview: Chip Huyen
 AI Engineering
Building Applications  
with Foundation Models
INFO:__name__:Loading 535 pieces of context!
INFO:__name__:Split successful. First chunk preview: Chip Huyen
 AI Engineering
Building Applications  
with Foundation Models...


In [4]:
bm25retriever = BM25Retriever.from_documents(pdf_files)

agent = SyntheticDataGenerator(llm=llm, 
                               retriever=bm25retriever,
                               output_path='./output',
                               buffer_size=10)

In [5]:
from tasks import Task

example_task = Task(
    task_name="sft",
    localization='AI Agents',
    task_description="Generate training examples for an AI Engineering LLM!",
    num_of_data=5,
    language="English"
)

In [6]:
import uuid

thread_config = {
    'configurable': {
        'thread_id': str(uuid.uuid4())
    }}

for update in agent.agent_flow.stream(
    {
        "task": example_task,
        "human_feedback": None
    },
    config=thread_config,
    stream_mode="updates"
):
    print(update)

INFO:multi_agent:Total number of conversation turns up to this point: 0
INFO:multi_agent:Prompt:[{'role': 'system', 'content': '\nYou are an advanced synthetic data generator, engineered to produce high-quality, task-specific synthetic datasets. Your mission is to generate data samples in formats that precisely adhere to the requirements provided.\n'}, {'role': 'user', 'content': 'You are tasked to help me generate a dataset of 5 rows entirely in English, based entirely on the following context:\npage_content=\'Figure 6-16. The hierarchy of information for an agent.\nMemory is essential for humans to operate. As AI applications have evolved, develop‐\ners have quickly realized that memory is important for AI models, too. Many mem‐\nory management tools for AI models have been developed, and many model\nproviders have incorporated external memory. Augmenting an AI model with a\nmemory system has many benefits. Here are just a few of them:\nManage information overflow within a session\nD

{'retrieve': {'retrieved_documents': [Document(metadata={}, page_content='Figure 6-16. The hierarchy of information for an agent.\nMemory is essential for humans to operate. As AI applications have evolved, develop‐\ners have quickly realized that memory is important for AI models, too. Many mem‐\nory management tools for AI models have been developed, and many model\nproviders have incorporated external memory. Augmenting an AI model with a\nmemory system has many benefits. Here are just a few of them:\nManage information overflow within a session\nDuring the process of executing a task, an agent acquires a lot of new informa‐\ntion, which can exceed the agent’s maximum context length. The excess informa‐\ntion can be stored in a memory system with long-term memories.\nPersist information between sessions\nAn AI coach is practically useless if every time you want the coach’s advice, you\nhave to explain your whole life story. An AI assistant would be annoying to use if\nit keeps forge

INFO:root:AFC remote call 1 is done.
INFO:multi_agent:Added to conversation memory: ```json
[
  {
    "prompt": "Explain the importance of memory in AI agents, drawing from the provided text.",
    "completion": "Memory is crucial for AI agents, mirroring its importance for humans. As AI models have advanced, developers have recognized the necessity of memory management. Augmenting AI models with memory systems offers several benefits, including managing information overflow within a session, persisting information between sessions, and boosting a model's consistency. Without memory, agents would struggle to retain context, personalize interactions, or provide consistent responses."
  },
  {
    "prompt": "Describe how memory systems help AI agents manage information overflow during a session.",
    "completion": "During task execution, an AI agent can acquire a large amount of new information that exceeds its maximum context length. A memory system, particularly one with long-term mem

{'generate': {'response': '```json\n[\n  {\n    "prompt": "Explain the importance of memory in AI agents, drawing from the provided text.",\n    "completion": "Memory is crucial for AI agents, mirroring its importance for humans. As AI models have advanced, developers have recognized the necessity of memory management. Augmenting AI models with memory systems offers several benefits, including managing information overflow within a session, persisting information between sessions, and boosting a model\'s consistency. Without memory, agents would struggle to retain context, personalize interactions, or provide consistent responses."\n  },\n  {\n    "prompt": "Describe how memory systems help AI agents manage information overflow during a session.",\n    "completion": "During task execution, an AI agent can acquire a large amount of new information that exceeds its maximum context length. A memory system, particularly one with long-term memory capabilities, allows the agent to store this

  Expected `str` but got `Document` with value `Document(metadata={}, pag...pter 6: RAG and Agents')` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


In [7]:
from langgraph.types import Command
agent.agent_flow.invoke(Command(resume='no'), config=thread_config)
agent.agent_flow.invoke(Command(resume='Sounds Good! But ask really weird questions instead, like youre a crazy scientist or something haha!'), config=thread_config)

INFO:multi_agent:Human in the loop Feedback: Sounds Good! But ask really weird questions instead, like youre a crazy scientist or something haha!
  Expected `str` but got `dict` with value `{'id': None, 'metadata': ...ts', 'type': 'Document'}` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(
INFO:multi_agent:Total number of conversation turns up to this point: 4
INFO:root:AFC is enabled with max remote calls: 10.
INFO:root:AFC remote call 1 is done.
INFO:root:AFC is enabled with max remote calls: 10.
INFO:root:AFC remote call 1 is done.
INFO:multi_agent:Added to conversation memory: ```json
[
  {
    "prompt": "If we could surgically implant the memory of a goldfish into an AI agent, would it dream of electric seaweed?",
    "completion": "Hypothetically, imbuing an AI agent with a goldfish's memory, while scientifically improbable with current technology, raises fascinating questions. The AI's 'dreams,' if it were capable of such a phenomenon,

{'task': Task(task_name='sft', localization='AI Agents', grounded_knowledge=Document(metadata={}, page_content='Figure 6-16. The hierarchy of information for an agent.\nMemory is essential for humans to operate. As AI applications have evolved, develop‐\ners have quickly realized that memory is important for AI models, too. Many mem‐\nory management tools for AI models have been developed, and many model\nproviders have incorporated external memory. Augmenting an AI model with a\nmemory system has many benefits. Here are just a few of them:\nManage information overflow within a session\nDuring the process of executing a task, an agent acquires a lot of new informa‐\ntion, which can exceed the agent’s maximum context length. The excess informa‐\ntion can be stored in a memory system with long-term memories.\nPersist information between sessions\nAn AI coach is practically useless if every time you want the coach’s advice, you\nhave to explain your whole life story. An AI assistant would

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 512,
    chunk_overlap=40
)

docs = ['INFO:multi_agent:Human in the loop Feedback: It sounds a bit generic, can you act like you a client with Leo placement to ask question?Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.Leo energy embodies the fiery, bold, and expressive nature of the fifth sign of the zodiac.']

In [None]:
texts = text_splitter.split_documents(docs)