# Building the Guestbook Tool

## Step 1: Load and Prepare the Dataset

In [1]:
import datasets
from langchain.docstore.document import Document

# Load the dataset
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")

# Convert dataset entries into Document objects
# docs = [
#     Document(
#         page_content="\n".join([
#             f"Name: {guest['name']}",
#             f"Relation: {guest['relation']}",
#             f"Description: {guest['description']}",
#             f"Email: {guest['email']}"
#         ]),
#         metadata={"name": guest["name"]}
#     )
#     for guest in guest_dataset
# ]

In [2]:
docs = [Document(
            page_content="".join(f"{key}: {value}\n" for key, value in guest.items()), 
            metadata={"name": guest["name"]}
        ) 
        for guest in guest_dataset]

print(docs)

[Document(metadata={'name': 'Ada Lovelace'}, page_content="name: Ada Lovelace\nrelation: best friend\ndescription: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.\nemail: ada.lovelace@example.com\n"), Document(metadata={'name': 'Dr. Nikola Tesla'}, page_content="name: Dr. Nikola Tesla\nrelation: old friend from university days\ndescription: Dr. Nikola Tesla is an old friend from your university days. He's recently patented a new wireless energy transmission system and would be delighted to discuss it with you. Just remember he's passionate about pigeons, so that might make for good small talk.\nemail: nikola.tesla@gmail.com\n"), Document(metadata={'name': 'Marie Curie'}, page_content='name: Marie Curie\nrelation: no relation\ndescription: Marie Curie was a groundbreaking physicis

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", " ", ""]
)
splits = r_splitter.split_documents(docs)
print(len(splits))

7


In [4]:
import os
from langchain.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

persist_directory = 'docs/chroma_guest/'
# embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Check if the vector store already exists
if os.path.exists(persist_directory) and os.listdir(persist_directory):
    print(f"Loading existing vector store from {persist_directory}")
    # Load the existing vector store
    vectordb = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings
    )
else:
    print(f"Creating new vector store in {persist_directory}")
    # Create a new vector store from documents
    vectordb = Chroma.from_documents(
        documents=splits,
        embedding=embeddings,
        persist_directory=persist_directory
    )

print(vectordb._collection.count())


Loading existing vector store from docs/chroma_guest/


  vectordb = Chroma(


7


## Step 2: Create the Retriever Tool

In [5]:
# from langchain_community.retrievers import BM25Retriever
from langchain.tools import Tool

# bm25_retriever = BM25Retriever.from_documents(docs)

# def extract_text(query: str) -> str:
#     """Retrieves detailed information about gala guests based on their name or relation."""
#     results = bm25_retriever.invoke(query)
#     if results:
#         return "\n\n".join([doc.page_content for doc in results[:3]])
#     else:
#         return "No matching guest information found."
    
def extract_text(query: str) -> str:
    """Retrieves detailed information about gala guests based on their name or relation."""
    # results = bm25_retriever.invoke(query)
    results = vectordb.similarity_search(query, k=3)
    if results:
        return "\n\n".join([doc.page_content for doc in results])
    else:
        return "No matching guest information found."

guest_info_tool = Tool(
    name="guest_info_retriever",
    func=extract_text,
    description="Retrieves detailed information about gala guests based on their name or relation."
)

In [6]:
extract_text("Tell me about the guest named Ada")  # Example query

"name: Ada Lovelace\nrelation: best friend\n\nemail: ada.lovelace@example.com\n\nrelation: best friend\ndescription: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine."

### Tool to Access the internet

In [6]:
from langchain_community.tools import DuckDuckGoSearchResults

def extract_web(query: str) -> str:
    """Retrieves latest news about gala guests from the internet."""
    
    search = DuckDuckGoSearchResults(output_format="list", max_results=2)
    results = search.run(query)
    if results:
        return results
    else:
        return "No matching guest information found online."

guest_info_web_tool = Tool(
    name="guest_news_web_retriever",
    func=extract_web,
    description="Retrieves latest news about gala guests from the internet."
)

In [None]:
extract_web("Tell me about Dr. Nikola Tesla")  # Example query

In [7]:
from langchain_tavily import TavilySearch

search_tool = TavilySearch(
    max_results=2,
    topic="general",
    # include_answer=False,
    # include_raw_content=False,
    # include_images=False,
    # include_image_descriptions=False,
    # search_depth="basic",
    time_range="week",
    # include_domains=None,
    # exclude_domains=None
)

# from tavily import TavilyClient
# def extract_web(query: str) -> str:
#     """Retrieves latest news about gala guests from the internet."""
    
#     client = TavilyClient(os.environ["TAVILY_API_KEY"])
    
#     results = client.search(
#         query=query,
#         max_results=2,
#         time_range="week"
#     )
#     if results:
#         return results
#     else:
#         return "No matching guest information found online."
    
# guest_info_web_tool = Tool(
#     name="guest_news_web_retriever",
#     func=extract_web,
#     description="Retrieves latest news about gala guests from the internet."
# )


In [64]:
search_tool.invoke("Who's the current President of France?")
# extract_web("Tell me about Dr. Nikola Tesla")  # Example query

{'query': "Who's the current President of France?",
 'follow_up_questions': None,
 'answer': None,
 'images': [],
 'results': [{'title': 'Emmanuel Macron | Biography, Political Party, Age, Presidency, & Facts ...',
   'url': 'https://www.britannica.com/biography/Emmanuel-Macron',
   'content': "Emmanuel Macron is a French banker and politician who was elected president of France in 2017. Macron was the first person in the history of the Fifth Republic to win the presidency without the backing of either the Socialists or the Gaullists, and he was France's youngest head of state since Napoleon.",
   'score': 0.7781275,
   'raw_content': None},
  {'title': 'Macron, Merz tout renewed Franco-German engine to lead Europe',
   'url': 'https://www.politico.eu/article/france-macron-germany-merz-tout-shared-leadership-ukraine-competition-paris/',
   'content': "PARIS — French President Emmanuel Macron and newly minted German Chancellor Friedrich Merz presented a shared vision for a well-armed an

### Weather tool

In [8]:
import random

def get_weather_info(location: str) -> str:
    """Fetches dummy weather information for a given location."""
    # Dummy weather data
    weather_conditions = [
        {"condition": "Rainy", "temp_c": 15},
        {"condition": "Clear", "temp_c": 25},
        {"condition": "Windy", "temp_c": 20}
    ]
    # Randomly select a weather condition
    data = random.choice(weather_conditions)
    return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"

# Initialize the tool
weather_info_tool = Tool(
    name="get_weather_info",
    func=get_weather_info,
    description="Fetches dummy weather information for a given location."
)

### Hub Stats Tool for Influential AI Builders

In [9]:
from huggingface_hub import list_models

def get_hub_stats(author: str) -> str:
    """Fetches the most downloaded model from a specific author on the Hugging Face Hub."""
    try:
        # List models from the specified author, sorted by downloads
        models = list(list_models(author=author, sort="downloads", direction=-1, limit=1))

        if models:
            model = models[0]
            return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
        else:
            return f"No models found for author {author}."
    except Exception as e:
        return f"Error fetching models for {author}: {str(e)}"

# Initialize the tool
hub_stats_tool = Tool(
    name="get_hub_stats",
    func=get_hub_stats,
    description="Fetches the most downloaded model from a specific author on the Hugging Face Hub."
)

# Example usage
print(hub_stats_tool("facebook")) # Example: Get the most downloaded model by Facebook

  print(hub_stats_tool("facebook")) # Example: Get the most downloaded model by Facebook


The most downloaded model by facebook is facebook/esmfold_v1 with 20,933,175 downloads.


## Step 3: Integrate the Tool with Alfred

In [60]:
from typing import TypedDict, Annotated, Optional
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.prebuilt import ToolNode
from langgraph.graph import START, StateGraph, END
from langgraph.prebuilt import tools_condition
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.checkpoint.memory import MemorySaver
import uuid
from IPython.display import Image, display, SVG
import json

llm_model = "gemini-2.0-flash-lite" # "gemma-3-27b-it" # 

llm = ChatGoogleGenerativeAI(
    model=llm_model,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

llm_critique = ChatGoogleGenerativeAI(
    model="gemma-3-27b-it",
    temperature=0,
    # max_tokens=None,
    # timeout=None,
    # max_retries=2,
)

tools = [guest_info_tool, search_tool, weather_info_tool, hub_stats_tool]
chat_with_tools = llm.bind_tools(tools)

# Generate the AgentState and Agent graph
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    last_ai_message: Optional[str]
    human_message: Optional[str]
    critique_suggestion: Optional[str]
    refine: Optional[bool]

def init(state: AgentState):
    last_human = None
    for msg in reversed(state["messages"]):
        if last_human is None and isinstance(msg, HumanMessage):
            last_human = msg
            break
    return {
        "human_message": last_human.content,
    }

def assistant(state: AgentState):
    return {
        "messages": [chat_with_tools.invoke(state["messages"])],
    }


def critique(state: AgentState):
    last_ai = None

    # Reverse loop to find the most recent ones
    for msg in reversed(state["messages"]):
        if last_ai is None and isinstance(msg, AIMessage):
            last_ai = msg
            break
    
    state["last_ai_message"] = last_ai.content

    response = llm_critique.invoke(f"Party Host: {state['human_message']}\n" 
                        + f"Host Assistant: {state['last_ai_message']}\n"
                        + "Does Host's Assistant answer Party Host's question?"
                        + "If no, has the assistant used the provided tools?"
                        + "the provided tools are:\n"
                        + "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
                        + "Answer in JSON format:\n"
                        + """{
                            "answer": True or False -> boolean,
                            "reason": "explain why",
                            "suggestion": "suggest a possible improvement"
                            }"""
    )
    # Parse the response content safely
    try:
        json_str = response.content.strip()
        # Optional: handle if response is wrapped in markdown/code block
        if json_str.startswith("```json"):
            json_str = json_str[7:-3].strip()
        result = json.loads(json_str)
        
        if not result.get("answer", True):
            state["critique_suggestion"] = result.get("suggestion", "No suggestion provided.")
            state["messages"].append(HumanMessage(content=f"🛠️ Suggested Improvement to your answer: {result.get("suggestion", "No suggestion provided.")}")),
            refine = True
        else:
            refine= False
    except Exception as e:
        print("Critique parsing error:", e)
        refine = False
        # return END  # Default to ending if JSON parsing fails
    # if state.get("critique_suggestion"):
    #     suggestion = state["critique_suggestion"]
    #     state["messages"].append(
    #         HumanMessage(content=f"🛠️ Suggested Improvement: {suggestion}")
    #     )
    #     # Clear it after use to avoid repeating
    #     state["critique_suggestion"] = None
    # Could log here, or use for routing logic only
    # return state
    return {
        "refine": refine,
        # "messages": state["messages"]
        # no need to add last_ai_message or last_human_message if you don't need them later
    }

def critique_condition(state: AgentState):
    if state["refine"] == False:
        # If the critique is not needed, end the conversation
        return END
    else:
        # If the critique is needed, continue the conversation
        return "assistant"


## The graph
builder = StateGraph(AgentState)

# Define nodes: these do the work
builder.add_node("init", init)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_node("critique", critique)

# Define edges: these determine how the control flow moves
builder.add_edge(START, "init")
builder.add_edge("init", "assistant")
builder.add_conditional_edges(
    "assistant",
    # If the latest message requires a tool, route to tools
    # Otherwise, provide a direct response
    tools_condition,
    {"tools": "tools", "__end__": "critique"},
)
builder.add_edge("tools", "assistant")
builder.add_conditional_edges(
    "critique",
    critique_condition,
)


# Adding memory in langgraph!
memory = MemorySaver()

alfred = builder.compile(checkpointer=memory)
# display(Image(alfred.get_graph(xray=True).draw_mermaid_png()))
mermaid_code = alfred.get_graph(xray=True).draw_mermaid()
print(mermaid_code)


---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	init(init)
	assistant(assistant)
	tools(tools)
	critique(critique)
	__end__([<p>__end__</p>]):::last
	__start__ --> init;
	init --> assistant;
	tools --> assistant;
	assistant -.-> tools;
	assistant -. &nbsp;__end__&nbsp; .-> critique;
	critique -.-> init;
	critique -.-> assistant;
	critique -.-> tools;
	critique -.-> __end__;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [61]:
thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}

messages = [
    SystemMessage(content="You are Alfred, a helpful assistant. You are hosting a gala dinner and need to answer questions about the guests or guest's question. You have access to guest database. You can also search the web for information. You have access to a weather tool and a Hugging Face Hub stats tool."),
    HumanMessage(content="Tell me about our guest named 'Lady Ada Lovelace'.")]
response = alfred.invoke({"messages": messages}, config)
messages = response["messages"]  # 🧠 Carry memory forward

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.


In [69]:
# Now let's ask a follow-up question
messages.append(HumanMessage(content="What was my first question?"))
response = alfred.invoke({"messages": messages}, config,)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
You asked me to tell you about our guest named 'Lady Ada Lovelace'.


In [9]:
messages.append(HumanMessage(content="Who is Tesla?"))
response = alfred.invoke({"messages": messages}, config,)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Dr. Nikola Tesla is an old friend from your university days. He's recently patented a new wireless energy transmission system and would be delighted to discuss it with you. Just remember he's passionate about pigeons, so that might make for good small talk.


In [62]:
messages = [HumanMessage(content="Who is Facebook and what's their most popular model?")]
response = alfred.invoke({"messages": messages}, config)

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
Understood. I will use the `tavily_search` tool with the queries "Who is Facebook?" and "What is Facebook's most popular model?" and present the results to you.


In [61]:
messages.append(HumanMessage(content="Tell me the latest news about Tesla?"))
response = alfred.invoke({"messages": messages}, config,)
messages = response["messages"]

print("🎩 Alfred's Response:")
print(response['messages'][-1].content)

🎩 Alfred's Response:
I am sorry, I cannot fulfill this request. I do not have access to information about Tesla.
