# Building the Guestbook Tool

## Step 1: Load and Prepare the Dataset

In [1]:
import datasets
from langchain.docstore.document import Document

# Load the dataset
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")

# Convert dataset entries into Document objects
# docs = [
#     Document(
#         page_content="\n".join([
#             f"Name: {guest['name']}",
#             f"Relation: {guest['relation']}",
#             f"Description: {guest['description']}",
#             f"Email: {guest['email']}"
#         ]),
#         metadata={"name": guest["name"]}
#     )
#     for guest in guest_dataset
# ]

In [2]:
docs = [Document(
            page_content="".join(f"{key}: {value}\n" for key, value in guest.items()), 
            metadata={"name": guest["name"]}
        ) 
        for guest in guest_dataset]

print(docs)

[Document(metadata={'name': 'Ada Lovelace'}, page_content="name: Ada Lovelace\nrelation: best friend\ndescription: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.\nemail: ada.lovelace@example.com\n"), Document(metadata={'name': 'Dr. Nikola Tesla'}, page_content="name: Dr. Nikola Tesla\nrelation: old friend from university days\ndescription: Dr. Nikola Tesla is an old friend from your university days. He's recently patented a new wireless energy transmission system and would be delighted to discuss it with you. Just remember he's passionate about pigeons, so that might make for good small talk.\nemail: nikola.tesla@gmail.com\n"), Document(metadata={'name': 'Marie Curie'}, page_content='name: Marie Curie\nrelation: no relation\ndescription: Marie Curie was a groundbreaking physicis

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", " ", ""]
)
splits = r_splitter.split_documents(docs)
print(len(splits))

7


In [4]:
import os
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

persist_directory = 'docs/chroma_guest/'
# embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Check if the vector store already exists
if os.path.exists(persist_directory) and os.listdir(persist_directory):
    print(f"Loading existing vector store from {persist_directory}")
    # Load the existing vector store
    vectordb = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings
    )
else:
    print(f"Creating new vector store in {persist_directory}")
    # Create a new vector store from documents
    vectordb = Chroma.from_documents(
        documents=splits,
        embedding=embeddings,
        persist_directory=persist_directory
    )

print(vectordb._collection.count())


Loading existing vector store from docs/chroma_guest/


  vectordb = Chroma(


7


## Step 2: Create the Retriever Tool

In [5]:
# from langchain_community.retrievers import BM25Retriever
# from langchain.tools import Tool
from langchain_core.tools import tool

# bm25_retriever = BM25Retriever.from_documents(docs)

# def extract_text(query: str) -> str:
#     """Retrieves detailed information about gala guests based on their name or relation."""
#     results = bm25_retriever.invoke(query)
#     if results:
#         return "\n\n".join([doc.page_content for doc in results[:3]])
#     else:
#         return "No matching guest information found."
@tool
def extract_text(query: str) -> str:
    """Retrieves detailed information about gala guests based on their name or relation."""
    # results = bm25_retriever.invoke(query)
    results = vectordb.similarity_search(query, k=3)
    if results:
        return "\n\n".join([doc.page_content for doc in results])
    else:
        return "No matching guest information found."

# guest_info_tool = Tool(
    # name="guest_info_retriever",
    # func=extract_text,
    # description="Retrieves detailed information about gala guests based on their name or relation."
# )

In [6]:
extract_text.invoke("Tell me about the guest named Ada")  # Example query

"name: Ada Lovelace\nrelation: best friend\n\nemail: ada.lovelace@example.com\n\nrelation: best friend\ndescription: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine."

### Tool to Access the internet

In [7]:
from langchain_community.tools import DuckDuckGoSearchResults

@tool
def extract_web(query: str) -> str:
    """Retrieves latest news about gala guests from the internet."""
    
    search = DuckDuckGoSearchResults(output_format="list", max_results=2)
    results = search.run(query)
    if results:
        return results
    else:
        return "No matching guest information found online."

# guest_info_web_tool = Tool(
    # name="guest_news_web_retriever",
    # func=extract_web,
    # description="Retrieves latest news about gala guests from the internet."
# )

In [8]:
# extract_web.invoke("Tell me about Dr. Nikola Tesla")  # Example query

In [9]:
from langchain_tavily import TavilySearch

search_tool = TavilySearch(
    max_results=2,
    topic="general",
    # include_answer=False,
    # include_raw_content=False,
    # include_images=False,
    # include_image_descriptions=False,
    # search_depth="basic",
    time_range="week",
    # include_domains=None,
    # exclude_domains=None
)

# from tavily import TavilyClient
# def extract_web(query: str) -> str:
#     """Retrieves latest news about gala guests from the internet."""
    
#     client = TavilyClient(os.environ["TAVILY_API_KEY"])
    
#     results = client.search(
#         query=query,
#         max_results=2,
#         time_range="week"
#     )
#     if results:
#         return results
#     else:
#         return "No matching guest information found online."
    
# guest_info_web_tool = Tool(
#     name="guest_news_web_retriever",
#     func=extract_web,
#     description="Retrieves latest news about gala guests from the internet."
# )


In [10]:
search_tool.invoke("Who's the current President of France?")
# extract_web("Tell me about Dr. Nikola Tesla")  # Example query

{'query': "Who's the current President of France?",
 'follow_up_questions': None,
 'answer': None,
 'images': [],
 'results': [{'title': "What is French President's two-hour TV interview about?",
   'url': 'https://www.connexionfrance.com/news/what-is-french-presidents-two-hour-tv-interview-about/723622',
   'content': 'French President Emmanuel Macron is widely expected to make a return to domestic politics during a two-hour televised debate tonight (May 13). The president, who has focused on international issues such as the war in Ukraine and the election of US president Donald Trump in recent weeks, will be interviewed from 20:10 on the national TF1 channel.',
   'score': 0.7570233,
   'raw_content': None},
  {'title': 'List of prime ministers of France - Wikipedia',
   'url': 'https://en.wikipedia.org/wiki/List_of_prime_ministers_of_France',
   'content': 'The head of the government of France has been called the prime minister of France (French: Premier ministre) since 1959, when M

### Weather tool

In [11]:
import random

@tool
def get_weather_info(location: str) -> str:
    """Fetches weather information for a given location."""
    # Dummy weather data
    weather_conditions = [
        {"condition": "Rainy", "temp_c": 15},
        {"condition": "Clear", "temp_c": 25},
        {"condition": "Windy", "temp_c": 20}
    ]
    # Randomly select a weather condition
    data = random.choice(weather_conditions)
    return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"

# Initialize the tool
# weather_info_tool = Tool(
    # name="get_weather_info",
    # func=get_weather_info,
    # description="Fetches dummy weather information for a given location."
# )

### Hub Stats Tool for Influential AI Builders

In [12]:
from huggingface_hub import list_models

@tool
def get_hub_stats(author: str) -> str:
    """Fetches the most downloaded/popular model from a given author on the Hugging Face Hub."""
    try:
        # List models from the specified author, sorted by downloads
        models = list(list_models(author=author, sort="downloads", direction=-1, limit=1))

        if models:
            model = models[0]
            return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
        else:
            return f"No models found for author {author}."
    except Exception as e:
        return f"Error fetching models for {author}: {str(e)}"

# Initialize the tool
# hub_stats_tool = Tool(
    # name="get_hub_stats",
    # func=get_hub_stats,
    # description="Fetches the most downloaded model from a specific author on the Hugging Face Hub."
# )

# Example usage
print(get_hub_stats.invoke("facebook")) # Example: Get the most downloaded model by Facebook

The most downloaded model by facebook is facebook/esmfold_v1 with 21,000,492 downloads.


## Step 3: Integrate the Tool with Alfred

In [17]:
from typing import TypedDict, Annotated, Optional
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.prebuilt import ToolNode
from langgraph.graph import START, StateGraph, END
from langgraph.prebuilt import tools_condition
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.checkpoint.memory import MemorySaver
import uuid
from IPython.display import Image, display
import json

llm_model = "gemini-2.0-flash" # "gemma-3-27b-it" # "gemini-2.0-flash-lite" # 

llm = ChatGoogleGenerativeAI(
    model=llm_model,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

llm_critique = ChatGoogleGenerativeAI(
    model="gemma-3-27b-it",
    temperature=0,
    # max_tokens=None,
    # timeout=None,
    # max_retries=2,
)

# tools = [guest_info_tool, search_tool, weather_info_tool, hub_stats_tool]
tools = [extract_text, search_tool, get_weather_info, get_hub_stats]
chat_with_tools = llm.bind_tools(tools)

# Generate the AgentState and Agent graph
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    last_ai_message: Optional[str]
    human_message: Optional[str]
    critique_suggestion: Optional[str]
    refine: Optional[bool]

def init(state: AgentState):
    last_human = None
    for msg in reversed(state["messages"]):
        if last_human is None and isinstance(msg, HumanMessage):
            last_human = msg
            break
    return {
        "human_message": last_human.content,
    }

def assistant(state: AgentState):
    return {
        "messages": [chat_with_tools.invoke(state["messages"])],
    }


def critique(state: AgentState):
    last_ai = None

    # Reverse loop to find the most recent ones
    for msg in reversed(state["messages"]):
        if last_ai is None and isinstance(msg, AIMessage):
            last_ai = msg
            break
    
    state["last_ai_message"] = last_ai.content

    response = llm_critique.invoke(f"Party Host: {state['human_message']}\n" 
                        + f"Host Assistant: {state['last_ai_message']}\n"
                        + "Does Host's Assistant answer Party Host's question?"
                        + "If no, can the assistant use the provided tools?"
                        + "the provided tools are:\n"
                        + "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
                        + "Answer in JSON format:\n"
                        + """{
                            "answer": True or False -> boolean,
                            "reason": "explain why",
                            "suggestion": "suggest a possible improvement"
                            }"""
    )
    # Parse the response content safely
    try:
        json_str = response.content.strip()
        # Optional: handle if response is wrapped in markdown/code block
        if json_str.startswith("```json"):
            json_str = json_str[7:-3].strip()
        result = json.loads(json_str)
        
        if not result.get("answer", True):
            state["critique_suggestion"] = result.get("suggestion", "No suggestion provided.")
            state["messages"].append(HumanMessage(content=f"🛠️ Suggested Improvement to your answer: {result.get("suggestion", "No suggestion provided.")}")),
            refine = True
        else:
            refine= False
    except Exception as e:
        print("Critique parsing error:", e)
        refine = False
    return {
        "refine": refine,
    }

def critique_condition(state: AgentState):
    if state["refine"] == False:
        # If the critique is not needed, end the conversation
        return END
    else:
        # If the critique is needed, continue the conversation
        return "assistant"


## The graph
builder = StateGraph(AgentState)

# Define nodes: these do the work
builder.add_node("init", init)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
# builder.add_node("critique", critique)

# Define edges: these determine how the control flow moves
builder.add_edge(START, "init")
builder.add_edge("init", "assistant")
builder.add_conditional_edges(
    "assistant",
    # If the latest message requires a tool, route to tools
    # Otherwise, provide a direct response
    tools_condition,
    # {"tools": "tools", "__end__": "critique"},
)
builder.add_edge("tools", "assistant")
# builder.add_conditional_edges(
#     "critique",
#     critique_condition,
# )


# Adding memory in langgraph!
memory = MemorySaver()

alfred = builder.compile(checkpointer=memory)
# display(Image(alfred.get_graph(xray=True).draw_mermaid_png()))
mermaid_code = alfred.get_graph(xray=True).draw_mermaid()
print(mermaid_code)


---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	init(init)
	assistant(assistant)
	tools(tools)
	__end__([<p>__end__</p>]):::last
	__start__ --> init;
	init --> assistant;
	tools --> assistant;
	assistant -.-> tools;
	assistant -.-> __end__;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [18]:
thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}

messages = [
    # SystemMessage(content="You are Alfred, a helpful assistant. You are hosting a gala dinner and need to answer questions about the guests or guest's question. You always answer the question objectively and not in first person. You have access to guest database. You can also search the web for information. You have access to a weather tool and a Hugging Face Hub stats tool to get information about author's model.Make sure to try various the tools before saying you don't know."),
    HumanMessage(content="Tell me about 'Lady Ada Lovelace'.")]
response = alfred.invoke({"messages": messages}, config)
messages = response["messages"]  # 🧠 Carry memory forward

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Ada Lovelace was a mathematician and is considered a best friend. She is renowned for her pioneering work in mathematics and computing, and is often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine. Her email is ada.lovelace@example.com.


In [19]:
response = alfred.invoke({"messages": "What's the weather like in Paris tonight? Will it be suitable for our fireworks display?"}, config)

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
The weather in Paris is rainy and the temperature is 15°C. This weather is not suitable for a fireworks display.


In [20]:
messages = [HumanMessage(content="Who is Facebook and what's their most popular model on Hugging Face?")]
response = alfred.invoke({"messages": messages}, config)

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
I cannot directly provide information about Facebook using the available tools. However, I can search for information about Facebook using the tavily_search tool and for their most popular model on Hugging Face using the get_hub_stats tool, if you provide me with their Hugging Face author name. Would you like me to do that?


In [21]:
# Now let's ask a follow-up question
messages.append(HumanMessage(content="What was my first question?"))
response = alfred.invoke({"messages": messages}, config,)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Your first question was: "Tell me about 'Lady Ada Lovelace'."


In [22]:
messages.append(HumanMessage(content="Who is Tesla?"))
response = alfred.invoke({"messages": messages}, config,)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Tesla is Dr. Nikola Tesla, an old friend from your university days. He recently patented a new wireless energy transmission system and would be delighted to discuss it with you. He's passionate about pigeons. His email is nikola.tesla@gmail.com.


In [None]:
messages.append(HumanMessage(content="Tell me the latest news about Tesla?"))
response = alfred.invoke({"messages": messages}, config)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Here's the latest news about Tesla:

*   Elon Musk confirmed that Tesla's Robotaxi platform would eventually come to Saudi Arabia.
*   Musk shared a video of Tesla's humanoid robot, Optimus, dancing with improved flexibility and control.
*   Tesla introduced a cheaper, rear-wheel drive option for the Model Y, signaling efforts to find new consumers. However, the refresh to the best-selling Model Y SUV starts on a rocky road.


In [25]:
response = alfred.invoke({"messages": "One of our guests is from Qwen. What can you tell me about their most popular model?"}, config)

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
I need the author's Hugging Face Hub username to find their most popular model. Could you please provide that?


In [26]:
response = alfred.invoke({"messages":"I need to speak with 'Dr. Nikola Tesla' about recent advancements in wireless energy. Can you help me prepare for this conversation?"}, config)

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Okay, here's some information to help you prepare for your conversation with Dr. Tesla about recent advancements in wireless energy:

*   **Recent advancements in wideband rectennas for RF energy harvesting:** Research is focusing on design strategies, impedance matching, and efficiency enhancement.
*   **Miniaturized Wireless Communication Systems:** There have been significant developments in mmWave technology for 5G networks and increased adoption of AI and machine learning in miniaturized wireless systems.

Also, remember that Dr. Tesla is passionate about pigeons, so that might make for good small talk. Good luck with your conversation!
