In [1]:
import json
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client

In [8]:
toolset_config = {
  "tools": {
    "extract-html-text": {
      "description": "Fetch a webpage and extract pure text, removing HTML tags",
      "execution": {
        "command": "curl -s <<url>> | sed '/<style/,/<\\/style>/d; /<script/,/<\\/script>/d' | sed 's/<[^>]*>//g' | grep -v '^$' | sed 's/^[[:space:]]*//' | sed '/^[[:space:]]*$/d' | sed -e ':a' -e 'N' -e '$!ba' -e 's/\\n\\n\\n*/\\n\\n/g'", 
      },
      "parameters": {
        "url": {
          "description": "URL of the webpage to fetch",
           "required": True
        }
      }
    },
    "extract-html-links": {
      "description": "Fetch a webpage and extract all links with their text",
      "execution": {
        "command": "curl -s <<url>> | grep -o '<a [^>]*>.*</a>' | sed 's/<a [^>]*href=\"\\([^\"]*\\)\"[^>]*>\\(.*\\)<\\/a>/\\2 (\\1)/'",
      },
      "parameters": {
        "url": {
          "description": "URL of the webpage to fetch",
          "required": True
        }
      }
    }
  }
}
# convert to json string
toolset_config_json = json.dumps(toolset_config)
print(json.dumps(toolset_config_json))

"{\"tools\": {\"extract-html-text\": {\"description\": \"Fetch a webpage and extract pure text, removing HTML tags\", \"execution\": {\"command\": \"curl -s <<url>> | sed '/<style/,/<\\\\/style>/d; /<script/,/<\\\\/script>/d' | sed 's/<[^>]*>//g' | grep -v '^$' | sed 's/^[[:space:]]*//' | sed '/^[[:space:]]*$/d' | sed -e ':a' -e 'N' -e '$!ba' -e 's/\\\\n\\\\n\\\\n*/\\\\n\\\\n/g'\"}, \"parameters\": {\"url\": {\"description\": \"URL of the webpage to fetch\", \"required\": true}}}, \"extract-html-links\": {\"description\": \"Fetch a webpage and extract all links with their text\", \"execution\": {\"command\": \"curl -s <<url>> | grep -o '<a [^>]*>.*</a>' | sed 's/<a [^>]*href=\\\"\\\\([^\\\"]*\\\\)\\\"[^>]*>\\\\(.*\\\\)<\\\\/a>/\\\\2 (\\\\1)/'\"}, \"parameters\": {\"url\": {\"description\": \"URL of the webpage to fetch\", \"required\": true}}}}}"


In [32]:
server_params = StdioServerParameters(
    command="python",
    # args=["-m", "mcp_this", "--tools_path", 'toolset_example__curl.yaml'],
    args=["-m", "mcp_this", "--tools", json.dumps(toolset_config)],
)

async with stdio_client(server_params) as (read, write):  # noqa: SIM117
    async with ClientSession(read, write) as session:
        await session.initialize()
        tools = await session.list_tools()
        print("Available tools:\n")
        for tool in tools.tools:
            print(f"{tool.name}:\n===")
            print(f"{tool.description}")
            print('-----------------------')

        extract_text_result = await session.call_tool(
            "extract-html-text",
            {"url": "https://example.com"},
        )
        extract_links_result = await session.call_tool(
            "extract-html-links",
            {"url": "https://example.com"},
        )

Available tools:

extract-html-text:
===
TOOL DESCRIPTION:

Fetch a webpage and extract pure text, removing HTML tags

COMMAND CALLED:

`curl -s <<url>> | sed '/<style/,/<\/style>/d; /<script/,/<\/script>/d' | sed 's/<[^>]*>//g' | grep -v '^$' | sed 's/^[[:space:]]*//' | sed '/^[[:space:]]*$/d' | sed -e ':a' -e 'N' -e '$!ba' -e 's/\n\n\n*/\n\n/g'`

Text like <<parameter_name>> (e.g. <<url>>) will be replaced with parameter values.

PARAMETERS:

- url [REQUIRED] (string): URL of the webpage to fetch
-----------------------
extract-html-links:
===
TOOL DESCRIPTION:

Fetch a webpage and extract all links with their text

COMMAND CALLED:

`curl -s <<url>> | grep -o '<a [^>]*>.*</a>' | sed 's/<a [^>]*href="\([^"]*\)"[^>]*>\(.*\)<\/a>/\2 (\1)/'`

Text like <<parameter_name>> (e.g. <<url>>) will be replaced with parameter values.

PARAMETERS:

- url [REQUIRED] (string): URL of the webpage to fetch
-----------------------


In [33]:
tool

Tool(name='extract-html-links', description='TOOL DESCRIPTION:\n\nFetch a webpage and extract all links with their text\n\nCOMMAND CALLED:\n\n`curl -s <<url>> | grep -o \'<a [^>]*>.*</a>\' | sed \'s/<a [^>]*href="\\([^"]*\\)"[^>]*>\\(.*\\)<\\/a>/\\2 (\\1)/\'`\n\nText like <<parameter_name>> (e.g. <<url>>) will be replaced with parameter values.\n\nPARAMETERS:\n\n- url [REQUIRED] (string): URL of the webpage to fetch', inputSchema={'properties': {'url': {'title': 'url', 'type': 'string'}}, 'required': ['url'], 'title': 'extract_html_linksArguments', 'type': 'object'}, annotations=None)

In [34]:
extract_text_result

CallToolResult(meta=None, content=[TextContent(type='text', text='Example Domain\nExample Domain\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\nMore information...\n', annotations=None)], isError=False)

In [35]:
print(extract_text_result.content[0].text)

Example Domain
Example Domain
This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.
More information...



In [36]:
print(extract_links_result.content[0].text)

More information... (https://www.iana.org/domains/example)

