In [None]:
import asyncio
import json
from typing import Optional
from contextlib import AsyncExitStack

from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from openai import OpenAI
import tiktoken

from dotenv import load_dotenv

load_dotenv()  # load environment variables from .env


True

In [2]:
reminders = """

    ## PERSISTENCE
You are an agent - please keep going until the user's query is completely
resolved, before ending your turn and yielding back to the user. Only
terminate your turn when you are sure that the problem is solved.

## TOOL CALLING
If you are not sure about file content or codebase structure pertaining to
the user's request, use your tools to read files and gather the relevant
information: do NOT guess or make up an answer.

## PLANNING
You MUST plan extensively before each function call, and reflect
extensively on the outcomes of the previous function calls. DO NOT do this
entire process by making function calls only, as this can impair your
ability to solve the problem and think insightfully.


    """

cot = "First, think carefully step by step about what documents are needed to answer the query. Then, print out the TITLE and ID of each document. Then, format the IDs into a list."

prompt = f"""

You're an agent. You can use a MCP server to handle browser interactions.
Here are some reminders to help you:
{reminders}

You should go to the website mon-marche.fr and buy the ingredient to make an apple pie. The website is in french. If you need an address, use '39 boulevard des capucines 75002 Paris'.
{cot}
"""

# Setup MCP

In [3]:
class MCPClient:
    def __init__(self):
        # Initialize session and client objects
        self.mcpSession: Optional[ClientSession] = None
        self.exit_stack = AsyncExitStack()
        self.available_tools = []
        self.stdio = None
        self.write = None

    async def connect_to_server(self, server_script_path: str):
        """Connect to an MCP server

        Args:
            server_script_path: Path to the server script (.py or .js)
        """
        is_python = server_script_path.endswith(".py")
        is_js = server_script_path.endswith(".js")
        if not (is_python or is_js):
            raise ValueError("Server script must be a .py or .js file")

        command = "python" if is_python else "node"
        server_params = StdioServerParameters(
            command=command, args=[server_script_path], env=None
        )

        stdio_transport = await self.exit_stack.enter_async_context(
            stdio_client(server_params)
        )
        self.stdio, self.write = stdio_transport
        self.mcpSession = await self.exit_stack.enter_async_context(
            ClientSession(self.stdio, self.write)
        )

        await self.mcpSession.initialize()

        # List available tools
        response = await self.mcpSession.list_tools()
        tools = response.tools
        for tool in tools:
            parameters = {
                **tool.inputSchema,
                "required": list(tool.inputSchema["properties"].keys()),
            }
            self.available_tools.append(
                {
                    "strict": True,
                    "type": "function",
                    "name": tool.name,
                    "description": tool.description,
                    "parameters": parameters,
                }
            )
        print("\nConnected to server with tools:", [tool.name for tool in tools])

    async def call_tool(self, tool_name: str, tool_args: dict):
        """Call a tool with the given name and arguments.

        Args:
            tool_name: Name of the tool to call
            tool_args: Arguments for the tool
        """
        if self.mcpSession is None:
            raise ValueError("MCP session is not initialized")
        if self.stdio is None or self.write is None:
            raise ValueError("Stdio transport is not initialized")

        # Call the tool
        result = await self.mcpSession.call_tool(tool_name, tool_args)
        return result

In [4]:
server_path = "/Users/Morgan/.nvm/versions/node/v20.13.0/lib/node_modules/@playwright/mcp/cli.js"
mcpClient = MCPClient()
await mcpClient.connect_to_server(server_path)
print(mcpClient.available_tools)

Sending request: SessionMessage(message=JSONRPCMessage(root=JSONRPCRequest(method='initialize', params={'protocolVersion': '2025-03-26', 'capabilities': {'sampling': {}, 'roots': {'listChanged': True}}, 'clientInfo': {'name': 'mcp', 'version': '0.1.0'}}, jsonrpc='2.0', id=0)), metadata=None)
Sending request: SessionMessage(message=JSONRPCMessage(root=JSONRPCRequest(method='tools/list', params=None, jsonrpc='2.0', id=1)), metadata=None)

Connected to server with tools: ['browser_close', 'browser_wait', 'browser_resize', 'browser_console_messages', 'browser_handle_dialog', 'browser_file_upload', 'browser_install', 'browser_press_key', 'browser_navigate', 'browser_navigate_back', 'browser_navigate_forward', 'browser_network_requests', 'browser_pdf_save', 'browser_snapshot', 'browser_click', 'browser_drag', 'browser_hover', 'browser_type', 'browser_select_option', 'browser_take_screenshot', 'browser_tab_list', 'browser_tab_new', 'browser_tab_select', 'browser_tab_close', 'browser_generate_

# LLM helpers

In [None]:
model ="gpt-4.1"
openai = OpenAI()
messages = []
encoding = tiktoken.encoding_for_model("gpt-4o")

### Count tokens

In [30]:
len(encoding.encode(str(messages)))

51709

### Append user message

In [7]:
def add_user(message):
  messages.append({"role":"user", "content": message})

add_user(prompt)

### Append assistant message

In [41]:
def add_assistant(message):
  messages.append({"role":"assistant", "content": message})

add_assistant(llm_response.output_text)

### Append message

In [58]:
messages.append(result_message)

In [62]:
print(len(str(messages)))
messages

148820


[{'role': 'user',
  'content': "\n\nYou're an agent. You can use a MCP server to handle browser interactions.\nHere are some reminders to help you:\n\n\n    ## PERSISTENCE\nYou are an agent - please keep going until the user's query is completely\nresolved, before ending your turn and yielding back to the user. Only\nterminate your turn when you are sure that the problem is solved.\n\n## TOOL CALLING\nIf you are not sure about file content or codebase structure pertaining to\nthe user's request, use your tools to read files and gather the relevant\ninformation: do NOT guess or make up an answer.\n\n## PLANNING\nYou MUST plan extensively before each function call, and reflect\nextensively on the outcomes of the previous function calls. DO NOT do this\nentire process by making function calls only, as this can impair your\nability to solve the problem and think insightfully.\n\n\n    \n\nYou should go to the website mon-marche.fr and buy the ingredient to make an apple pie. The website is

### Get response

In [43]:
len(str(messages))

6280

In [63]:
def get_response(input):
  return openai.responses.create(
            model=model, input=input, tools=mcpClient.available_tools
        )

llm_response = get_response(messages)
print(llm_response.output_text)
print(list(map(lambda output: output.type, llm_response.output)))

Here is the first collected document for the process of finding and buying apple pie ingredients on mon-marche.fr:

---
### Document Details

- **TITLE:** Livraison de courses à domicile | Courses en ligne | Mon Marché  
- **ID:** https://www.mon-marche.fr/

---

To continue, I will need to collect similar information (TITLE, ID) for each major step in the shopping process for ALL required ingredients:

1. Search & product listing pages for:
   - pomme (apple)
   - farine (flour)
   - sucre (sugar)
   - beurre (butter)
   - oeuf (egg)
   - cannelle (cinnamon)

2. The detail/product pages selected from the above search results
3. The cart/review page
4. The checkout/address/payment pages

As of now, my formatted list of IDs is:

```json
[
  "https://www.mon-marche.fr/"
]
```

I will proceed now to search for "pomme" (apple) and collect the TITLE and ID for the search results or product listing page. After that, I’ll repeat for the remaining ingredients.

Would you like to see the proces

### Compress history

In [None]:
def compress():

    history = []

    prompt = """
You're an helpful agent.
This are all the messages between a user and an AI agent. Compress these messages for the AI to have context when contiue its request.
Your output will be used like this :

next_promp = '[the user request will go here]

This is some context about the previous conversation:
[the resumed context you have to produce will go here]
'

The history of the messages :
{history}
"""

    llm_response = openai.responses.create(
        model="gpt-4.1",
        input={"role": "user", "content": "prompt"},
        tools=mcpClient.available_tools,
    )
    return llm_response


compressResponse = compress()

In [50]:
llm_response.output

[ResponseFunctionToolCall(arguments='{}', call_id='call_d7NtP9EqPtuhaAlj4sQLm9hp', name='browser_snapshot', type='function_call', id='fc_682f8230c9d88191b574cfb17f20691c0767d77c0e7e916a', status='completed')]

### Run tool

In [51]:
tool_calls = list(filter(lambda output: output.type =='function_call',llm_response.output))
tool_calls

[ResponseFunctionToolCall(arguments='{}', call_id='call_d7NtP9EqPtuhaAlj4sQLm9hp', name='browser_snapshot', type='function_call', id='fc_682f8230c9d88191b574cfb17f20691c0767d77c0e7e916a', status='completed')]

In [None]:
async def run_tool(tool_call):

    tool_name = tool_call.name
    tool_args = json.loads(tool_call.arguments)
    result = await mcpClient.call_tool(tool_name, tool_args)
    return result


tool_call = tool_calls[0]

toolcall_message = tool_call.to_dict()
result = await run_tool(tool_call)
result_message = {
    "type": "function_call_output",
    "call_id": tool_call.call_id,
    "output": str(result),
}

print("Tool call result:", result)
print(len(str(result)))

Sending request: SessionMessage(message=JSONRPCMessage(root=JSONRPCRequest(method='tools/call', params={'name': 'browser_snapshot', 'arguments': {}}, jsonrpc='2.0', id=7)), metadata=None)
Tool call result: meta=None content=[TextContent(type='text', text='- Ran Playwright code:\n```js\n// <internal code to capture accessibility snapshot>\n```\n\n- Page URL: https://www.mon-marche.fr/\n- Page Title: Livraison de courses à domicile | Courses en ligne | Mon Marché\n- Page Snapshot\n```yaml\n- banner [ref=s2e6]:\n  - button "burger-menu" [ref=s2e9]:\n    - img "burger-menu" [ref=s2e11]: \ue937\n  - generic [ref=s2e12]:\n    - generic:\n      - generic:\n        - generic:\n          - generic:\n            - generic:\n              - generic: Recherche\n              - button "cross":\n                - generic:\n                  - img "cross": \ue95b\n            - generic:\n              - img: \ue918\n              - textbox "Rechercher un produit ou une recette"\n    - link "mon march

# Testing

In [None]:
result_message = {
    "type": "function_call_output",
    "call_id": tool_call.call_id,
    "output": str(result),
}
result_message

{'type': 'function_call_output',
 'call_id': 'call_4Y7hDaW2B3wB9cxqGZS8xN5f',
 'output': 'meta=None content=[TextContent(type=\'text\', text=\'- Ran Playwright code:\\n```js\\n// Navigate to https://www.mon-marche.fr\\nawait page.goto(\\\'https://www.mon-marche.fr\\\');\\n```\\n\\n- Page URL: https://www.mon-marche.fr/\\n- Page Title: Livraison de courses à domicile | Courses en ligne | Mon Marché\\n- Page Snapshot\\n```yaml\\n- img [ref=s1e8]: \\ue941\\n- generic [ref=s1e10]:\\n  - text: "Créneau expiré. Nouveau créneau sélectionné :"\\n  - generic [ref=s1e11]: Jeu 22 - 07:00 - 09:00\\n- button "Modifier" [ref=s1e12]:\\n  - generic [ref=s1e14]: Modifier\\n- banner [ref=s1e18]:\\n  - button "burger-menu" [ref=s1e21]:\\n    - img "burger-menu" [ref=s1e23]: \\ue937\\n  - generic [ref=s1e24]:\\n    - generic:\\n      - generic:\\n        - generic:\\n          - generic:\\n            - generic:\\n              - generic: Recherche\\n              - button "cross":\\n                - gen