# Notebook to show how to create benchmark

We will be using Playwright here.

In [17]:
from playwright.async_api import async_playwright

playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = True)

In [18]:
DEFAULT_PLAYWRIGHT_PROMPT = '''
Your goal is to write Playwright Python code to answer queries.

Your answer must be a Python markdown only.

Prefer User-Facing Attributes, Use text selectors, like text="Visible Text", to target elements by their visible text. 
You can also use Attributes like aria-label, aria-labelledby, role, etc., to target elements.
When user-facing attributes are not available or sufficient, Prefer class names and IDs that are meaningful and unlikely to change. 
Avoid using automatically generated, framework-specific, or obfuscated classes.
Utilize parent-child relationships to narrow down the element, especially when looking for elements within a specific section of the page

You can assume the following code has been executed:
```python
from playwright.async_api import async_playwright

playwright = await async_playwright().start()
browser = await playwright.chromium.connect_over_cdp("http://localhost:9222")
default_context = browser.contexts[0]

# Retrieve the first page in the context.
page = default_context.pages[0]
```

---

HTML:
<!DOCTYPE html>
<html>
<head>
    <title>Mock Search Page</title>
</head>
<body>
    <h1>Search Page Example</h1>
    <input id="searchBar" type="text" placeholder="Type here to search...">
    <button id="searchButton">Search</button>
    <script>
        document.getElementById('searchButton').onclick = function() {{
            var searchText = document.getElementById('searchBar').value;
            alert("Searching for: " + searchText);
        }};
    </script>
</body>
</html>

Query: Click on the search bar 'Type here to search...', type 'selenium', and press the 'Enter' key

Completion:
```python
# Let's proceed step by step.
# First we need to identify the component first, then we can click on it.

# Based on the HTML, the link can be uniquely identified using the ID "searchBar"
# Click on the search bar
search_bar = page.locator('#searchBar').first
await search_bar.click()

# Type 'selenium' into the search bar
await search_bar.type('selenium')

# Press the 'Enter' key
await page.keyboard.press('Enter')

```

---

HTML:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Mock Page for Selenium</title>
</head>
<body>
    <h1>Welcome to the Mock Page</h1>
    <div id="links">
        <a href="#link1" id="link1">Link 1</a>
        <br>
        <a href="#link2" class="link">Link 2</a>
        <br>
    </div>
</body>
</html>

Query: Click on the title Link 1 and then click on the title Link 2

Completion:
```python
# Let's proceed step by step.
# First we need to identify the first component, then we can click on it. Then we can identify the second component and click on it.

# Based on the HTML, the first link the link can be uniquely identified using the ID "link1"
# Let's use this ID with playwright to identify the link
link1 = page.locator('#link1').first

# Then we click on the link
await link1.click()

# The other link can be uniquely identified using the class "link"
# Let's use this class to identify the link
link2 = page.locator('.link').first

# Click on the element found
await link2.click()
```

---

HTML:
<!DOCTYPE html>
<html>
<head>
    <title>Mock Page</title>
</head>
<body>
    <p id="para1">This is the first paragraph.</p>
    <p id="para2">This is the second paragraph.</p>
    <p id="para3">This is the third paragraph, which we will select and copy.</p>
    <p id="para4">This is the fourth paragraph.</p>
</body>
</html>

Query: Select the text inside the third paragraph

Completion:
```python
# Let's proceed step by step.

# Select the third paragraph element
third_paragraph = page.locator("(//p)[3]").first
# Get the text inside the third paragraph
text = third_paragraph.inner_text()
```

---

HTML:

Query: Scroll up a bit

Completion:
```python
# Let's proceed step by step.
# We don't need to use the HTML data as this is a stateless operation.
# 200 pixels should be sufficient. Let's execute the JavaScript to scroll up.

await page.evaluate("window.scrollBy(0, 200)")
```

---

---

HTML:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Enhanced Mock Page for Selenium Testing</title>
</head>
<body>
    <h1>Enhanced Test Page for Selenium</h1>
    <div class="container">
        <button id="firstButton" onclick="alert('First button clicked!');">First Button</button>
        <!-- This is the button we're targeting with the class name "action-btn" -->
        <button class="action-btn" onclick="alert('Action button clicked!');">Action Button</button>
        <div class="nested-container">
            <button id="testButton" onclick="alert('Test Button clicked!');">Test Button</button>
        </div>
        <button class="hidden" onclick="alert('Hidden button clicked!');">Hidden Button</button>
    </div>
</body>
</html>


Query: Click on the Button 'Action Button'

Completion:
```python
# Let's proceed step by step.
# First we need to identify the button first, then we can click on it.

# Based on the HTML provided, we need to devise the best strategy to select the button.
# The action button can be identified using the class name "action-btn"
action_button = page.locator('.action-btn').first

# Then we can click on it
await action_button.click()
```

---

HTML:
{context_str}
Query: {query_str}
Completion:
'''

In [19]:
import os
from llama_index.llms.azure_openai import AzureOpenAI
from lavague.action_engine import ActionEngine
from lavague.defaults import DefaultEmbedder

api_key=os.getenv("AZURE_OPENAI_KEY")
api_version="2023-05-15"
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
model = "gpt-35-turbo"
deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-35-turbo")

class LLM(AzureOpenAI):
    def __init__(self):
        super().__init__(
            model=model,
            deployment_name=deployment_name,
            api_key=api_key,
            azure_endpoint=azure_endpoint,
            api_version=api_version,
            temperature=0.0
        )
llm = LLM()

embedder = DefaultEmbedder()

action_engine = ActionEngine(llm, embedder, streaming=False, prompt_template=DEFAULT_PLAYWRIGHT_PROMPT)

### Classification prompt

## Load dataset

In [20]:
import pandas as pd 
import asyncio
from IPython.display import display, Code, HTML
import re

df = pd.read_csv("subsampled_mind2web.csv")
df['uncontaminated_html'] = df['cleaned_html'].apply(lambda x: re.sub(r' backend_node_id="\d+"', '', x))
df.head()

Unnamed: 0.1,Unnamed: 0,split,annotation_uid,confirmed_task,raw_html,cleaned_html,action_uid,operation,code,cur_actions_desc,cur_actions_reprs,pos_candidates,prev_actions_desc,prev_actions_reprs,uncontaminated_html
0,6033,test_task,640e0425-bceb-45ff-ba4d-dbc5b62e31d5,"Find the ""Rock And Roll Over"" reviews","<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""338"">\n <body>\n <d...",96238fb3-bc46-4dff-95c0-4b2d4ecc70ea,"{'op': 'TYPE', 'original_op': 'TYPE', 'value':...",```python\nelement = driver.find_element(By.XP...,"Enter ""Rock And Roll Over"" in the text box to ...",[textbox] Enter artist name or song title -> ...,"[{'attributes': '{""backend_node_id"": ""248"", ""b...",,,<html>\n <body>\n <div>\n <div>\n ...
1,3394,test_domain,34e0bf85-6441-40cb-b7f6-d107e5bcb049,Look up the visitors trend for Apple stock,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""14307"">\n <div backend...",8043e8e8-bb7a-4521-80bf-246bc16e883c,"{'op': 'CLICK', 'original_op': 'CLICK', 'value...",```python\nelement = driver.find_element(By.XP...,"Click on ""AAPL"" to look up the visitors trend ...",[div] AAPL -> CLICK,"[{'attributes': '{""backend_node_id"": ""14603"", ...","['Enter ""apple"" in the search box to look up t...","['[textbox] Search for news, symbols or compa...",<html>\n <div>\n <div>\n ...
2,320,test_domain,77269ea5-70a4-4cfa-a2f9-9937a1c55096,Search for early care and education programs f...,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""44287"">\n <body>\n ...",b416d08b-90c4-43e1-b3d6-9d4a05e56a06,"{'op': 'CLICK', 'original_op': 'CLICK', 'value...",```python\nelement = driver.find_element(By.XP...,Select the After School Care checkbox.,[checkbox] After School Care -> CLICK,"[{'attributes': '{""backend_node_id"": ""44285"", ...","['Click on the ""services for RESIDENTS"" link.'...","['[link] services for RESIDENTS -> CLICK', '[...",<html>\n <body>\n <form>\n <input nam...
3,6669,test_task,7f90a191-9dbe-478a-8ae2-8aa45b790158,Find more films from the director of Smile.,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""23177"">\n <body>\n ...",fd713700-3876-44a8-80ab-4da898beab42,"{'op': 'CLICK', 'original_op': 'CLICK', 'value...",```python\nelement = driver.find_element(By.XP...,"Click on ""Smile.""",[div] Smile -> CLICK,"[{'attributes': '{""backend_node_id"": ""23642"", ...","['Search for ""Smile"" in the TV Shows and Movie...",['[textbox] Search TV Shows and Movies... -> ...,<html>\n <body>\n <div>\n <div>\n ...
4,226,test_domain,332ed50d-4772-4eb3-9de9-27ff39abc161,Create a Fitness board.,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""123"">\n <body backend_...",01342a5e-5cc3-45e7-bd9d-b522611fc7bf,"{'op': 'CLICK', 'original_op': 'CLICK', 'value...",```python\nelement = driver.find_element(By.XP...,Click on James Smith's profile image.,[img] James Smith -> CLICK,"[{'attributes': '{""backend_node_id"": ""117"", ""b...",,,<html>\n <body>\n <div>\n <div role...


## Playwright

In [21]:
import numpy as np

def longest_common_substring(s1, s2):
    m, n = len(s1), len(s2)
    dp = [[0] * (n+1) for _ in range(m+1)]  # DP table
    longest, end_pos = 0, 0
    
    for i in range(1, m+1):
        for j in range(1, n+1):
            if s1[i-1] == s2[j-1]:
                dp[i][j] = dp[i-1][j-1] + 1
                if dp[i][j] > longest:
                    longest = dp[i][j]
                    end_pos = i
            else:
                dp[i][j] = 0
    return longest, s1[end_pos-longest: end_pos]

def compute_lcs_scores(true_node, output_node):
    lcs_length, longest = longest_common_substring(true_node, output_node)
    true_node_score = lcs_length / len(true_node)
    output_node_score = lcs_length / len(output_node)
    return np.array([true_node_score, output_node_score])

import ast
from playwright.async_api import Locator

import re

def keep_assignments(code_snippet):
    # Regex to match variable assignments. This pattern assumes variable names are valid Python identifiers
    # and captures typical assignment statements, excluding those that might appear in comments or strings.
    pattern = r'^\s*[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*.+'

    # Filter and keep only lines with variable assignments
    assignments = [line for line in code_snippet.split('\n') if re.match(pattern, line)]

    # Join the filtered lines back into a string
    return "\n".join(assignments)

# This function will be used to visit each node in the AST
class VariableVisitor(ast.NodeVisitor):
    def __init__(self):
        super().__init__()
        self.output = []
    
    def visit_Assign(self, node):
        
        # For each assignment, print the targets (variables)
        for target in node.targets:
            if isinstance(target, ast.Name):  # Ensure it's a variable assignment
                self.output.append(target.id)

In [22]:
from IPython.display import display, Code, HTML
import asyncio
from tqdm import tqdm

outputs = []

N_rows = -1

for index, row in tqdm(df.iloc[:N_rows].iterrows()):
    
    ground_truth_code = row["code"]
    id_html = row["cleaned_html"]
    html = row["uncontaminated_html"]
    query = row["cur_actions_desc"]
    
    id_page = await browser.new_page()
    id_page.set_default_timeout(5000)
    await id_page.set_content(id_html)
    
    page = await browser.new_page()
    page.set_default_timeout(5000)
    await page.set_content(id_html)

    instruction = query

    generated_code, source_nodes = action_engine.get_action(query, html)

    context_str = "\n".join(source_nodes)

    # Executes async the generated code
    code = keep_assignments(generated_code)
    parsed_code = ast.parse(code)

    # Create a visitor instance and use it to visit the nodes in the parsed AST
    visitor = VariableVisitor()
    visitor.visit(parsed_code)
    variables = visitor.output
    variable = variables[0]
    
    test_func = None
    indented_code = f"async def test_func():\n"

    ground_truth_code = row["code"]

    backend_node_id = ground_truth_code.split("backend_node_id")[1].split("']")[0].replace("='","")

    ground_truth_element_str = f"""
    \t{code}
    \tground_truth_element= id_page.locator('//*[@backend_node_id="{backend_node_id}"]')
    \tground_truth_outer_html = await ground_truth_element.evaluate("el => el.outerHTML")
    \ttarget_outer_html = await {variable}.evaluate("el => el.outerHTML")
    \tscore = max(compute_lcs_scores(ground_truth_outer_html, target_outer_html))
    \treturn score
    """

    indented_code += ground_truth_element_str
    exec(indented_code, globals())
    loop = asyncio.get_event_loop()

    if test_func:
        try:
            score = await loop.create_task(test_func())
            execution_success = True
            execution_error = ""
            identification_result = ""
        except Exception as e:
            score = None
            print("Error occurred:", e)
            execution_success = False
    else:
        raise Exception("test_func not defined, issue generating code")
    output = {
        "query": query,
        "score": score,
        "html": html,
        "source_nodes": ("-"*10).join(source_nodes),
        "prompt": DEFAULT_PLAYWRIGHT_PROMPT.format(context_str=source_nodes, query_str=query),
        "generated_code": code,
        "ground_truth_code": f"""page.locator('//*[@backend_node_id="{backend_node_id}"]')""",
        "execution_success": execution_success,
        "execution_error": execution_error,
        "identification_result": identification_result,
        "model_id": model,
    }
    outputs.append(output)
    await page.close()

0it [00:00, ?it/s]

Retrying llama_index.llms.openai.base.OpenAI._chat in 0.6151966207815092 seconds as it raised APIConnectionError: Connection error..
Retrying llama_index.llms.openai.base.OpenAI._chat in 0.30411521716332635 seconds as it raised APIConnectionError: Connection error..
Retrying llama_index.llms.openai.base.OpenAI._chat in 2.881391784129477 seconds as it raised APIConnectionError: Connection error..
Retrying llama_index.llms.openai.base.OpenAI._chat in 1.9880749782533131 seconds as it raised APIConnectionError: Connection error..


In [None]:
output_df = pd.DataFrame(outputs)
output_df

Unnamed: 0,query,score,html,source_nodes,prompt,generated_code,ground_truth_code,execution_success,execution_error,identification_result,model_id,retrieve_code,indexing_time,completion_time
0,"Click on the ""Baby Names"" link.",1.0,<html>\n <body>\n <div>\n <div>\n ...,"<li>\n <button type=""...",\nYour goal is to write Playwright Python code...,"baby_names_link = page.locator('text=""Baby Nam...","page.locator('//*[@backend_node_id=""217""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",11.863199,1.188703
1,"Select the ""Female"" option.",1.0,<html>\n <body>\n <div>\n <div>\n ...,<div>\n <div>\n ...,\nYour goal is to write Playwright Python code...,female_radio_button = page.locator('[aria_labe...,"page.locator('//*[@backend_node_id=""7123""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",11.539545,1.156297
2,"Click on the ""Search Baby Names"" button.",1.0,<html>\n <body>\n <div>\n <div>\n ...,"<li>\n <button type=""...",\nYour goal is to write Playwright Python code...,"search_button = page.locator('text=""Search bab...","page.locator('//*[@backend_node_id=""15270""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",11.327075,1.281173
3,Click on the option for the current most popul...,0.357143,<html>\n <body>\n <div>\n <div>\n ...,"<li>\n <button type=""...",\nYour goal is to write Playwright Python code...,"option = page.locator('text=""Top Baby Names 20...","page.locator('//*[@backend_node_id=""21528""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",18.736696,1.788663
4,"Click on the ""Most popular"" menu item.",0.469388,<html>\n <body>\n <div>\n <div>\n ...,"<div>\n <div>\n <ul role=""menu"">\n...",\nYour goal is to write Playwright Python code...,"most_popular_menu_item = page.locator('text=""M...","page.locator('//*[@backend_node_id=""37562""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",18.736193,1.12164
5,"Click on the link for the name ""Emma"".",1.0,<html>\n <body>\n <div>\n <div>\n ...,"<main>\n <nav aria_label=""bre...",\nYour goal is to write Playwright Python code...,"emma_link = page.locator('text=""Emma""').first","page.locator('//*[@backend_node_id=""53712""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",19.142285,1.127619
6,Scroll down to the item for Emma's name popula...,1.0,<html>\n <body>\n <div>\n <div>\n ...,<div>\n <strong>\n ...,\nYour goal is to write Playwright Python code...,"emma_name_popularity = page.locator('text=""Emm...","page.locator('//*[@backend_node_id=""67923""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",13.706698,1.312132
7,Click on the button without label.,1.0,<html>\n <body>\n <div>\n <div>\n ...,<h2>\n <text>Emma name popu...,\nYour goal is to write Playwright Python code...,button_without_label = page.locator('div[role=...,"page.locator('//*[@backend_node_id=""77310""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",13.743337,1.373192
8,Select the option for the year 2015.,1.0,<html>\n <body>\n <div>\n <div>\n ...,"<ul role=""listbox"">\n <li role=""optio...",\nYour goal is to write Playwright Python code...,"year_2015 = page.locator('text=""2015""').first","page.locator('//*[@backend_node_id=""87226""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",15.398518,1.058737
9,"Type ""dermatologist"" into the search box for c...",1.0,<html>\n <body>\n <div>\n <div>\n ...,<main>\n <div>\n <section>...,\nYour goal is to write Playwright Python code...,"search_box = page.locator('input[name=""patient...","page.locator('//*[@backend_node_id=""96""]')",True,,,gpt-35-turbo,"def get_retriever_code(embed, html):\n \n ...",13.161091,1.384105
