In [1]:
import os
from termios import BSDLY

In [2]:
from strands import Agent

In [3]:
from strands.models.anthropic import AnthropicModel

In [4]:
from browser import BrowserTool

In [12]:
from markdownify import markdownify as md

In [23]:
from pydantic import BaseModel, Field

In [5]:
import nest_asyncio
nest_asyncio.apply()

In [6]:
model = AnthropicModel(
    client_args = {
        "api_key": os.getenv("ANTHROP_API_KEY"),
    },
    max_tokens=4096,
    model_id="claude-sonnet-4-5"
)

In [7]:
browser = BrowserTool(dst_dir='../working_dir', headless=False)

In [8]:
agent = Agent(model=model,
              tools=[
                  browser.navigate,
                  browser.save_as_pdf,
                  browser.save_html,
                  browser.save_current_page_as_pdf,
                  browser.save_html,
                  browser.click,
                  browser.fill_form,
                  browser.get_text,
                  browser.get_current_url],
              system_prompt="""You are a web crawling agent.
              Your aim is to traverse a given website as per instructions that you will receive.
              Along with that, you are to use the tools at your disposal. You will click on any popups in an appropriate manner to remove it as efficiently as possible. You will also click on any cookie requests to ensure that the main content of the page is visible without any obstruction.
              """
              )

In [9]:
resp = agent("Navigate to https://strandsagents.com/latest/")

I'll navigate to that URL for you.
Tool #1: navigate
Successfully navigated to https://strandsagents.com/latest/. The page has loaded with a status code of 200. Is there anything specific you'd like me to do on this page, such as extract information, save it as a PDF, or interact with any elements?

In [10]:
resp = agent("There is a request for a cookie consent. Can you click on accept?")

I'll look for the cookie consent button and click on accept.
Tool #2: click
Let me try a different selector for the accept button:
Tool #3: click
Perfect! I've successfully clicked the Accept button for the cookie consent. The cookies have been accepted and the page should now be clear of the consent popup.

In [11]:
resp = agent("How did you click on the cookie consent? Tell me your methodology.")

Great question! Let me explain my methodology for clicking the cookie consent:

## My Approach:

### First Attempt:
I initially tried a more specific selector:
```css
button[aria-label="Accept cookies"]
```

This targeted a button element with an aria-label attribute containing "Accept cookies". This is a common pattern for accessible cookie consent buttons. However, this attempt **failed with a timeout**, meaning the element either didn't exist or wasn't visible/clickable within 30 seconds.

### Second Attempt (Successful):
When the first selector failed, I switched to a more flexible approach:
```css
button:has-text("Accept")
```

This is a **text-based selector** that looks for any button element containing the text "Accept". This approach is:
- More flexible (doesn't rely on specific attributes)
- Language-agnostic to some degree (as long as the button says "Accept")
- Simpler and catches various implementations of accept buttons

### Why This Works:
Cookie consent dialogs vary gre

In [11]:
resp = agent("Navigate to any documentation page. Documentation pages can be anything along the lines of user guides, resources, APIs, ")

I'll look for and navigate to a documentation page. Let me first check the current page to find links to documentation, user guides, resources, or API pages.
Tool #14: click
Perfect! I've successfully navigated to a documentation page at https://strandsagents.com/latest/documentation/docs/. This appears to be the main documentation section of the Strands Agents website.
Tool #15: get_current_url
Great! I'm now on the documentation page titled "Welcome - Strands Agents" at https://strandsagents.com/latest/documentation/docs/. This is the documentation section of the website. Would you like me to explore specific sections, save this page, or navigate to any particular documentation topic?

In [25]:
class MarkDownModel(BaseModel):
    url: str = Field(description="URL which is being converted to markdown")
    markdown: str = Field(description="Markdown content of the url")

In [26]:
resp = agent("Extract the main documentation in the given webpage to markdown. Preserve all links and headings", structured_output_model=MarkDownModel)

I'll extract the main documentation content from the webpage and convert it to markdown, preserving all links and headings.
Tool #4: get_text
The text extraction is quite messy due to the page structure. Let me try to get the HTML source and then extract the content more cleanly to convert it to markdown.
Tool #5: save_html
Let me try extracting the main article content more specifically:
Tool #6: get_text
Let me try a different approach and look for the actual documentation content:
Tool #7: get_text


KeyboardInterrupt: 

In [20]:
markdown_data = md(data)

In [21]:
markdown_data



In [22]:
with open('test.md', 'w') as f:
    f.write(markdown_data)

In [12]:
resp = agent("Save the PDF and the HTML")

I'll save both the PDF and HTML of the current documentation page.
Tool #16: save_current_page_as_pdf

Tool #17: save_html


Exception in callback _UnixReadPipeTransport._read_ready()
handle: <Handle _UnixReadPipeTransport._read_ready()>
Traceback (most recent call last):
  File "/Users/sri/.local/share/uv/python/cpython-3.12.10-macos-aarch64-none/lib/python3.12/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
RuntimeError: cannot enter context: <_contextvars.Context object at 0x107c00dc0> is already entered
Exception in callback _UnixReadPipeTransport._read_ready()
handle: <Handle _UnixReadPipeTransport._read_ready()>
Traceback (most recent call last):
  File "/Users/sri/.local/share/uv/python/cpython-3.12.10-macos-aarch64-none/lib/python3.12/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
RuntimeError: cannot enter context: <_contextvars.Context object at 0x107c00dc0> is already entered
Exception in callback _UnixReadPipeTransport._read_ready()
handle: <Handle _UnixReadPipeTransport._read_ready()>
Traceback (most recent call las

Perfect! I've successfully saved both files:

1. **PDF**: `../working_dir/strandsagents.com/2025-12-25/pdf/strandsagents.com-latest-documentation-docs.pdf`
2. **HTML**: `../working_dir/strandsagents.com/2025-12-25/source/strandsagents.com-latest-documentation-docs.html`

Both the PDF and HTML versions of the documentation page have been saved. Is there anything else you'd like me to do on this site?

In [None]:
re

In [8]:
browser.navigate("https://strandsagents.com/latest/documentation/docs/user-guide/concepts/tools/custom-tools/#example-with-multiple-tools-in-a-class")

{'status': 'success',
 'message': 'Successfully navigated to https://strandsagents.com/latest/documentation/docs/user-guide/concepts/tools/custom-tools/#example-with-multiple-tools-in-a-class',
 'final_url': 'https://strandsagents.com/latest/documentation/docs/user-guide/concepts/tools/custom-tools/#example-with-multiple-tools-in-a-class',
 'status_code': 200}

In [9]:
browser.save_current_page_as_pdf('screenshots/example.pdf')

{'status': 'success',
 'message': 'Successfully saved current page as PDF',
 'current_url': 'https://strandsagents.com/latest/documentation/docs/user-guide/concepts/tools/custom-tools/#example-with-multiple-tools-in-a-class',
 'output_path': 'screenshots/example.pdf'}