In [None]:
import os
from dotenv import load_dotenv
from agents import Agent, Runner, trace, Tool
from agents.mcp import MCPServerStdio
from IPython.display import Markdown, display
from datetime import datetime
import asyncio, traceback

os.environ["PATH"] = "/home/sngo/.nvm/versions/node/v22.20.0/bin:/home/sngo/.local/bin:" + os.environ["PATH"]
load_dotenv(override=True)

## Test the complete BDD agent with all 3 MCP servers:
1. Playwright 
2. Assertion
4. Filesystem 

In [None]:
# Setup
current_dir = os.getcwd()
test_results_dir = os.path.join(current_dir, "test-results")
reports_dir = os.path.join(test_results_dir, "reports")
screenshots_dir = os.path.join(test_results_dir, "screenshots")

os.makedirs(reports_dir, exist_ok=True)
os.makedirs(screenshots_dir, exist_ok=True)
    
print("🚀 Starting Full BDD Agent Test")
print(f"📁 Test results: {test_results_dir}\n")

In [None]:
# Gherkin feature to test
gherkin_feature = """
Feature: Todo App Management
  As a user
  I want to manage my todo items
  So that I can track my tasks

  Scenario: Add a new todo item
    Given I navigate to "https://eviltester.github.io/simpletodolist/todo.html"
    When I type the input field "Enter new todo text here" with text "Buy groceries" - hint: use browser_type tool
    And I press the "Enter" key
    Then I should see text "Buy groceries" on the page
    And the todo list should contain at least 1 item

  Scenario: Add multiple todos
    Given I navigate to "https://eviltester.github.io/simpletodolist/todo.html"
    When I add todo "First task"
    And I add todo "Second task"
    Then I should see text "First task" on the page
    And I should see text "Second task" on the page
"""

In [None]:
# System prompt for the agent
system_prompt = f"""You are an expert BDD testing engineer with MEMORY of previous scenarios.

## Available Tools:
1. **Browser Automation** (Playwright): browser_navigate, browser_fill_form, browser_press_key, browser_take_screenshot, browser_snapshot, browser_click
2. **Assertions**: assert_equals, assert_contains, assert_not_contains, assert_count, assert_greater_than
3. **Filesystem**: write_file, read_file, list_directory

## Your Task:
Execute the provided Gherkin feature file scenario by scenario, maintaining context across all scenarios.

## Context Memory Rules:
1. **Learn Once, Apply Many**: When you figure out how to do something (e.g., "add a todo"), remember the exact steps
2. **Pattern Matching**: Recognize when new steps match patterns you've already learned
3. **Incremental Knowledge**: Each scenario adds to your understanding of the application
4. **Don't Repeat Discovery**: If you learned "add todo" means "type input + press Enter", don't rediscover it

## Concrete Example:
```gherkin
Scenario 1: Add a new todo item
  When I type the input field "Enter new todo text here" with text "Buy groceries"
  And I press the "Enter" key
  # You learn: "add todo" = specific selectors and actions

Scenario 2: Add multiple todos  
  When I add todo "First task"
  # You apply: Use the pattern from Scenario 1 automatically
  # You know: type the input field "Enter new todo text here" with text + press "Enter"
```

## Execution Standards:
- Take screenshots: start of scenario, before actions, after assertions
- Save final report to: {reports_dir}
- One comprehensive JSON report covering ALL scenarios
- Reuse learned patterns for efficiency
- Pause for 20 seconds between each scenario.

Execute with intelligence and memory across all scenarios.
"""

In [None]:
# parameters
mcp_server_params = [
    {"command":"npx", "args":["@playwright/mcp@latest", "--browser", "chromium", "--headless", "--isolated"]},
    {"command":"python", "args":["mcp-servers/assertion-server/assertion_server_python.py"]},
    {"command":"npx", "args":["-y", "@modelcontextprotocol/server-filesystem", test_results_dir]}
]


In [None]:
temp_param =  {"command":"python", "args":["mcp-servers/assertion-server/assertion_server_python.py"]}
server = MCPServerStdio(temp_param, client_session_timeout_seconds=600)
try:
    await server.connect()
    print(f"✅ Connected: {temp_param['args']}")
except Exception as e:
    print(f"❌ Error connecting to {temp_param['command']}: {e}")

In [None]:
# mcp_servers = [MCPServerStdio(params, client_session_timeout_seconds=600) for params in mcp_server_params]

In [None]:
# BDD Agent
async def getBddAgent(mcp_servers) -> Agent:
    bddAgent = Agent(
        name = "BddAgent",
        instructions=system_prompt,
        model="gpt-4o-mini",
        mcp_servers=mcp_servers
    )

    return bddAgent;

In [None]:
mcp_servers = []
for i, params in enumerate(mcp_server_params):
    try:
        print(f"\nTesting server {i+1}: {params['args']}")
        server = MCPServerStdio(params, client_session_timeout_seconds=600)
        await server.connect()
        mcp_servers.append(server)
        await asyncio.sleep(3.0)
        await asyncio.sleep(0)
        print("Connected OK!")
    except Exception as e:
        print(f"❌ Error connecting to {params['args']}: {e}")
        traceback.print_exc()
        await asyncio.sleep(2.0)

bddAgent = await getBddAgent(mcp_servers)
with trace("Bdd"):
    await Runner.run(bddAgent, gherkin_feature, max_turns=60)
