# RAG development  using SBERT

### Model setup

#### Installing sbert transformers

In [1]:
%pip install -U sentence-transformers

Note: you may need to restart the kernel to use updated packages.


#### Setup embeddings model

In [1]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange
2024-10-11 10:48:36.698750: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-11 10:48:36.810243: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-11 10:48:37.414066: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## INDEXING

### LOAD

In [2]:
#@title Import Mind2Web dataset form huggingface
from datasets import load_dataset

# Text only dataset
ds = load_dataset("osunlp/Mind2Web")
# Multimodal dataset
#ds = load_dataset("osunlp/Multimodal-Mind2Web")

In [3]:
# Extract the train split from the dataset
train_ds = ds['train']

# Get an example task for testing
task = train_ds[0]
# Get the HTML from one of the task's actions for testing
html = task['actions'][0]['cleaned_html']
print(len(html))

42254


In [6]:
print(html)

<html backend_node_id="117">
  <body>
    <div backend_node_id="561">
      <div backend_node_id="562">
        <div backend_node_id="569">
          <div backend_node_id="570">
            <a backend_node_id="572">
                <text backend_node_id="573">Skip to main content</text>
              </a>
            <a backend_node_id="578">
                      <text backend_node_id="579">Use Tock at your business</text>
                    </a>
                  <header backend_node_id="584">
              <div backend_node_id="586">
                  <a backend_node_id="589" aria_label="Tock home page"/>
                  <div backend_node_id="597">
                    <button backend_node_id="598" aria_label="Search"/>
                    <button backend_node_id="602" aria_label="Menu"/>
                  </div>
                </div>
              <div backend_node_id="607">
                <div backend_node_id="608">
                  <div backend_node_id="610">
               

In [7]:
print('webpage: ', task['website'])
print('domain: ', task['domain'])
print('subdomain: ', task['subdomain'])

print('user_prompt: ', task['confirmed_task'])

webpage:  exploretock
domain:  Travel
subdomain:  Restaurant
user_prompt:  Book a winery tour in Napa Valley in a winery which serves Mediterranean cuisine with wine testing for 4 guests on April 15, 10 am in a outdoor setup.


## SPLITTING

In [14]:
from bs4 import BeautifulSoup
import pandas as pd

# Function to create a path with parent 'backend_node_id' attributes
def create_path(node, path=''):
    # Recursively build the path using 'backend_node_id' attributes
    if not node or not node.has_attr('backend_node_id'):
        return path
    current_node_id = node['backend_node_id']
    parent_path = create_path(node.parent, path)
    return f"{parent_path}/{current_node_id}".lstrip('/')

# Function to recursively gather text from both the current element and its children
def gather_text(element):
    # If the element itself has text, capture it
    text = element.get_text(separator=' ', strip=True) if element else ''
    return text

# Function to parse the HTML and extract paths and content for use in LAM models
def parse_html_for_rag(html_content):
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')

    # List to store parsed data
    data = []

    # Iterate through all elements with 'backend_node_id' attributes
    for element in soup.find_all(attrs={"backend_node_id": True}):
        # Create hierarchical path using parent-child relationships
        path = create_path(element)
        
        # Gather text content from the current element and its children
        text = gather_text(element)
        
        # Check for interactive components and attributes that signify interaction
        is_interactive = element.name in ['button', 'a', 'input', 'select', 'textarea'] or element.has_attr('role')
        
        # Extract additional attributes that might indicate interactive behavior
        element_info = {
            "backend_node_id": element.get("backend_node_id"),
            "tag": element.name,
            "text": text,
            "path": path,
            "is_interactive": is_interactive,
            "role": element.get("role"),
            "aria_label": element.get("aria-label"),  # Corrected to standard attribute name
        }
        
        # Include elements that are interactive or have meaningful text content
        if is_interactive or text.strip():  
            data.append(element_info)

    # Convert the data into a DataFrame
    df = pd.DataFrame(data)

    return df

# NOTE: We could clean the text

In [15]:
parsed_task = parse_html_for_rag(html)
print(parsed_task.to_string(index=False))

backend_node_id     tag                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [16]:
def flatten_and_verbalize_html(dataFrameRow):
    text = '''tag: {tag}.
    text: {text}.
    path: {path}'''
    
    return text.format(tag=dataFrameRow['tag'], text=dataFrameRow['text'], path=dataFrameRow['path'])

In [17]:
df = parsed_task.copy()
df['text_combined'] = df.apply(lambda row: f"tag:{row['tag']} path:{row['path']} text:{row['text']} is_interactive:{row['is_interactive']} role:{row['role']} aria_label:{row['aria_label']}", axis=1)


print(df['text_combined'][0])

tag:html path:117 text:Skip to main content Use Tock at your business Book a reservation Reservations Search DELICIOUS STARTS HERE. Reservation type Dine in Pickup Delivery Events Wineries Everything Location Date Time Now 11:30 AM 12:00 PM 12:30 PM 1:00 PM 1:30 PM 2:00 PM 2:30 PM 3:00 PM 3:30 PM 4:00 PM 4:30 PM 5:00 PM 5:30 PM 6:00 PM 6:30 PM 7:00 PM 7:30 PM 8:00 PM 8:30 PM 9:00 PM 9:30 PM 10:00 PM 10:30 PM 11:00 PM 11:30 PM Party size 1 guest 2 guests 3 guests 4 guests 5 guests 6 guests 7 guests 8 guests 9 guests 10 guests Search Explore all that Tock has to offer Dine in Pickup Delivery Events Wineries New & Notable The latest & greatest on Tock Explore all Explore all Agni Columbus, OH - Brewery District Grill Streetside 62 Bistro Washington Court House, OH Restaurant Hell's Backbone Grill & Farm Boulder, UT Four Corners Farm To Table Symposium Cincinnati, OH - East Walnut HIlls Wine Shop The Merchant Tavern Akron, OH - Merriman Valley American Luigi's Ristorante Italiano Mason, OH

### EMBEDDINGS

In [18]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

# Function to create embeddings from HTML content dataframe
def generate_html_embeddings(df, embedding_model):
    # Combine the relevant text fields into a single string for embedding
    df['text_combined'] = df['text_combined'] = df.apply(lambda row: f"tag:{row['tag']} path:{row['path']} text:{row['text']} is_interactive:{row['is_interactive']} role:{row['role']} aria_label:{row['aria_label']}", axis=1)
    
    # Generate embeddings for each combined text field
    embeddings = embedding_model.encode(df['text_combined'].tolist(), convert_to_tensor=False)
    
    return embeddings

In [21]:
embeddings = generate_html_embeddings(parsed_task.copy(), model)
print(embeddings.shape)

(463, 384)


## RETRIEVAL

In [4]:
# Extract the user prompt from the task
# NOTE: 
#   1. We could use some pre-reasoning to include with the user prompt
#   2. We should use some way to enrich the query with task information like the reasoning and the current step
user_prompt = task['confirmed_task']
print(user_prompt)

Book a winery tour in Napa Valley in a winery which serves Mediterranean cuisine with wine testing for 4 guests on April 15, 10 am in a outdoor setup.


In [14]:
# Prompt embeddings
#query = "List of wineries in Napa Valley that offer Mediterranean cuisine."
query = "Search for Napa Valley Winneries"
query_embeddins = model.encode(query, convert_to_tensor=False)

In [15]:
# Get similarities
similarities = model.similarity(query_embeddins, embeddings)
print(similarities)

tensor([[ 0.2914,  0.2883,  0.2959,  0.2964,  0.3061,  0.0215,  0.0013,  0.0300,
          0.0161,  0.1681,  0.0140, -0.0016, -0.0025,  0.1434,  0.1401,  0.0750,
          0.0200,  0.0932,  0.0855,  0.0845,  0.0892,  0.0914,  0.0848,  0.0917,
          0.1011,  0.0950,  0.0971,  0.0028, -0.0066,  0.3215,  0.3186,  0.2126,
          0.0641,  0.0627,  0.0574,  0.0704,  0.0773, -0.0254, -0.0076, -0.0185,
          0.0025,  0.1821,  0.1937,  0.0588,  0.0514,  0.0641,  0.0460,  0.0800,
          0.0285,  0.0111,  0.0407,  0.0391,  0.0013,  0.0063,  0.0328,  0.0339,
          0.0737,  0.0689, -0.0047,  0.0003, -0.0053,  0.0104,  0.0236,  0.0081,
         -0.0068, -0.0091, -0.0070,  0.0021,  0.0119,  0.0113, -0.0034, -0.0141,
         -0.0095, -0.0003, -0.0258,  0.0060, -0.0082, -0.0114,  0.0086, -0.0027,
          0.0085,  0.0008,  0.0131,  0.0027,  0.0060, -0.0006,  0.0137,  0.0060,
          0.0083, -0.0009,  0.0123,  0.0058,  0.0081, -0.0008,  0.0124,  0.0040,
          0.0103,  0.0006,  

In [16]:
# Get the top k similar indices
import tensorflow as tf
top_values, top_indices = tf.math.top_k(similarities, k=10)

print("Similarity scores:", top_values.numpy())
print("Top indices:", top_indices.numpy())

Similarity scores: [[0.32146847 0.31864136 0.3061033  0.29642457 0.29585326 0.29137075
  0.28834957 0.21972197 0.21259952 0.21040052]]
Top indices: [[ 29  30   4   3   2   0   1 180  31 197]]


2024-10-10 09:57:05.606663: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [93]:
# Display the top 5 most relevant elements
indexes = top_indices.numpy()

print ("Top 5 most relevant elements:\n")
for i in indexes:
    print(parsed_task.iloc[i].to_string(index=False))

Top 5 most relevant elements:

backend_node_id  tag                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     

#### Evaluating results

In [98]:
neg_candidates = task['actions'][0]['neg_candidates']
for i in indexes[0]:
    for candidate in neg_candidates:
        if parsed_task.iloc[i]['backend_node_id'] == candidate['backend_node_id']:
            print('Negative Candidate:', candidate['backend_node_id'])

Negative Candidate: 647
Negative Candidate: 648
Negative Candidate: 570
Negative Candidate: 569
Negative Candidate: 562
Negative Candidate: 561
Negative Candidate: 908
Negative Candidate: 649
Negative Candidate: 935


In [51]:
# Task positive candidates
print(task['actions'][0]['pos_candidates'])

[{'attributes': '{"backend_node_id": "110", "bounding_box_rect": "557.671875,634.390625,24,24", "class": "MuiSvgIcon-root css-tdzr9e", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '110', 'is_original_target': True, 'is_top_level_target': True, 'tag': 'svg'}]


In [52]:
def find_node_with_id(id):
    return parsed_task[parsed_task['path'].str.contains(id)]

In [89]:
# Find rows where the 'path' column contains the specified value
results = find_node_with_id('110')
print(results.to_string(index=False))

Empty DataFrame
Columns: [backend_node_id, tag, text, path, is_interactive, role, aria_label]
Index: []


In [54]:
found_nodes = find_node_with_id('727')
print(found_nodes.to_string(index=False))

backend_node_id    tag text                                                path  is_interactive role aria_label
            728 button      561/562/569/570/647/648/649/663/664/722/725/727/728            True None       None


In [43]:
found_nodes = find_node_with_id('597')
print(found_nodes.to_string(index=False))

backend_node_id    tag text                            path  is_interactive role aria_label
            598 button      561/562/569/570/584/586/597/598            True None       None
            602 button      561/562/569/570/584/586/597/602            True None       None


In [33]:
# Show the found nodes as tree
from typing import Dict, List
from dataclasses import dataclass

@dataclass
class Element:
    id: str
    tag: str
    children: List['Element']

def create_tree(paths_data: List[dict]) -> Dict[str, Element]:
    elements = {}
    
    # First, create all elements that are explicitly listed
    for item in paths_data:
        elements[item['backend_node_id']] = Element(
            id=item['backend_node_id'],
            tag=item['tag'],
            children=[]
        )
    
    # Then, establish parent-child relationships
    for item in paths_data:
        path = item['path'].split('/')
        
        # Traverse the path and establish relationships
        for i in range(1, len(path)):
            parent_id = path[i - 1]
            child_id = path[i]
            
            if parent_id in elements and child_id in elements:
                parent_element = elements[parent_id]
                child_element = elements[child_id]
                if child_element not in parent_element.children:
                    parent_element.children.append(child_element)
    
    # Find the root(s) - elements that have no parent
    all_elements = set(elements.keys())
    all_children = set(child.id for element in elements.values() for child in element.children)
    roots = {id: elem for id, elem in elements.items() if id not in all_children}
    
    # Ensure all elements are attached to a root
    if len(roots) > 1:
        main_root = Element(id="root", tag="root", children=list(roots.values()))
        return {"root": main_root}
    
    return roots

def print_tree(element: Element, level: int = 0):
    indent = "  " * level
    print(f"{indent}- {element.id} ({element.tag})")
    for child in element.children:
        print_tree(child, level + 1)

In [48]:
root_node = "597" # Remember that the id has to be set as string
# Print results as a tree
roots = create_tree(find_node_with_id(root_node).to_dict(orient='records'))
for root in roots.values():
    print_tree(root)

- root (root)
  - 598 (button)
  - 602 (button)


## NEWRAG

### LOADING

In [9]:
# Parse the HTML content using BeautifulSoup
from bs4 import BeautifulSoup
import pandas as pd

def get_ancestor_context(element):
    context = []
    current = element
    while current.parent:
        parent = current.parent
        # Get the parent's direct text (excluding the current element's content)
        parent_text = ' '.join([
            sibling.get_text(strip=True) 
            for sibling in parent.children 
            if sibling != current and sibling.get_text(strip=True)
        ])
        if parent_text:
            context.append({
                'level': len(context),
                'tag': parent.name,
                'text': parent_text,
                'backend_node_id': parent.get('backend_node_id', ''),
                'role': parent.get('role', ''),
                'aria_label': parent.get('aria-label', '')
            })
        current = parent
    return list(reversed(context))  # Return from top-most ancestor to closest

def create_path(node, path=''):
    if not node or not node.has_attr('backend_node_id'):
        return path
    current_node_id = node['backend_node_id']
    parent_path = create_path(node.parent, path)
    return f"{parent_path}/{current_node_id}".lstrip('/')

def parse_html_for_rag(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    data = []
    
    interactive_elements = soup.find_all(lambda tag: (
        tag.name in ['button', 'a', 'input', 'select', 'textarea'] or
        tag.has_attr('role') or
        any(attr for attr in tag.attrs if 'click' in attr.lower() or 'onclick' in attr.lower())
    ) and tag.has_attr('backend_node_id'))

    for element in interactive_elements:
        context_text = get_ancestor_context(element)
        
        element_info = {
            "backend_node_id": element.get("backend_node_id"),
            "tag": element.name,
            "text": element.get_text(strip=True),
            "context_text": context_text,
            "path": create_path(element),
            "is_interactive": True,
            "role": element.get("role", ""),
            "aria_label": element.get("aria_label", ""),
            "class": " ".join(element.get("class", [])),
            "id": element.get("id", "")
        }
        data.append(element_info)

    df = pd.DataFrame(data)
    return df

### INDEXING

In [6]:
# Generate the embeddints
def generate_html_embeddings(df, embedding_model):
    df['embedding_text'] = df.apply(lambda row: f"""
    Interactive element: {row['tag']}
    Text: {row['text']}
    Context: {row['context_text']}
    Path: {row['path']}
    Role: {row['role']}
    Aria Label: {row['aria_label']}
    Class: {row['class']}
    ID: {row['id']}
    """.strip(), axis=1)
    
    embeddings = embedding_model.encode(df['embedding_text'].tolist(), convert_to_tensor=False)
    return embeddings

### RETRIEVAL

In [7]:
def compute_similarities(query_embedding, embeddings):
    # Compute cosine similarity
    normalized_query = query_embedding / np.linalg.norm(query_embedding)
    normalized_embeddings = embeddings / np.linalg.norm(embeddings, axis=1)[:, np.newaxis]
    similarities = np.dot(normalized_embeddings, normalized_query.T).flatten()
    return similarities

def retrieve_elements(query, embeddings, parsed_df, model, top_k=5):
    # Encode query
    query_embedding = model.encode([query], convert_to_tensor=False)[0]  # Get the first (and only) embedding
    
    # Compute similarities
    similarities = compute_similarities(query_embedding, embeddings)
    
    # Get top k indices
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    
    return parsed_df.iloc[top_indices], similarities[top_indices]

### Test rag

In [56]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Setup model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Parse HTML
parsed_df = parse_html_for_rag(html)

# Generate embeddings
embeddings = generate_html_embeddings(parsed_df, model)

# Retrieve elements for a query
user_prompt = """Book a winery tour in Napa Valley:

    Location: Napa Valley.
"""
query = user_prompt
relevant_elements, similarities = retrieve_elements(query, embeddings, parsed_df, model)

# Print results
for i, (_, element) in enumerate(relevant_elements.iterrows()):
    print(f"Result {i+1} (similarity: {similarities[i]:.4f}):")
    print(f"backend_node_id: {element['backend_node_id']} - Tag: {element['tag']}")
    print(f"Text: {element['text']}")
    print(f"Aria Label: {element['aria_label']}")
    print(f"Context: {element['context_text']}")
    print("---")
    
print('Precision respecto al positive: ')
print(rag_precision(relevant_elements, task['actions'][0]['neg_candidates'], html, 3))



Result 1 (similarity: 0.1420):
backend_node_id: 1366 - Tag: a
Text: Download it now
Aria Label: Download the Tock app
Context: [{'level': 5, 'tag': 'body', 'text': 'null', 'backend_node_id': '', 'role': '', 'aria_label': ''}, {'level': 4, 'tag': 'div', 'text': "Skip to main content Use Tock at your business Book a reservationReservationsSearch DELICIOUSSTARTSHERE.Reservation typeDine inPickupDeliveryEventsWineriesEverythingLocationDateTimeNow11:30 AM12:00 PM12:30 PM1:00 PM1:30 PM2:00 PM2:30 PM3:00 PM3:30 PM4:00 PM4:30 PM5:00 PM5:30 PM6:00 PM6:30 PM7:00 PM7:30 PM8:00 PM8:30 PM9:00 PM9:30 PM10:00 PM10:30 PM11:00 PM11:30 PMParty size1 guest2 guests3 guests4 guests5 guests6 guests7 guests8 guests9 guests10 guestsSearchExplore all that Tock has to offerDine inPickupDeliveryEventsWineriesNew & NotableThe latest & greatest on TockExplore allExplore allAgniColumbus, OH - Brewery DistrictGrillStreetside 62 BistroWashington Court House, OHRestaurantHell's Backbone Grill & FarmBoulder, UTFour Cor

In [57]:
pos_candidates = task['actions'][0]['pos_candidates']
neg_candidates = task['actions'][0]['neg_candidates']

print(len(pos_candidates)) 
print(len(neg_candidates))

1
537


In [58]:
print(pos_candidates[0])

{'attributes': '{"backend_node_id": "4946", "bounding_box_rect": "757,41,69,65", "class": "b-navigation-link m-has-sublist js-accessibility-nav-link js-list-show", "id": "outlet", "role": "menuitem", "is_clickable": "true", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '4946', 'is_original_target': False, 'is_top_level_target': True, 'tag': 'a'}


In [59]:
for candidate in neg_candidates:
    print(candidate)


{'attributes': '{"backend_node_id": "164", "bounding_box_rect": "0,0,1280,1080", "class": "desktop landscape", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '164', 'tag': 'html'}
{'attributes': '{"backend_node_id": "268", "bounding_box_rect": "0,0,1280,1080", "id": "l-body", "class": "l-body ", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '268', 'tag': 'body'}
{'attributes': '{"backend_node_id": "1350", "bounding_box_rect": "0,0,1280,5940.359375", "id": "bodyPage", "class": "l-body-page js-page", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '1350', 'tag': 'div'}
{'attributes': '{"backend_node_id": "1354", "bounding_box_rect": "0,106,1280,60", "class": "bfx-price-container bfx-disable-element-containter", "data_pw_testid_buckeye_candidate": "1"}', 'backend_node_id': '1354', 'tag': 'div'}
{'attributes': '{"backend_node_id": "1356", "bounding_box_rect": "0,0,1280,106", "class": "l-body-page_header l-header b-header js-header", "data_pw_tes

In [60]:
def candidates_in_negative_candidates(relevant_elements, neg_candidates):
    for i, (_, element) in enumerate(relevant_elements.iterrows()):
        for candidate in neg_candidates:
            if candidate.get('backend_node_id') == element['backend_node_id']:
                print(f"Found negative candidate {candidate['backend_node_id']} in relevant elements")

In [61]:
candidates_in_negative_candidates(relevant_elements, neg_candidates)

Found negative candidate 1366 in relevant elements


In [62]:
from bs4 import BeautifulSoup

# Check if relevant elements are in positive candidates path
def get_element_path(element, html_content, k=3):
    # Find the element with the specified backend_node_id
    backend_node_id = element['backend_node_id']
    target_element = html_content.find(attrs={"backend_node_id": backend_node_id})
    
    path = _get_element_path(target_element, 0, k)
    return path
    
def _get_element_path(element, current_k, max_k): # It would be better if it was a list and not a string
    if element is None:
        return ''
    
    if current_k >= max_k:
        return element.get('backend_node_id', 'NaN')
    
    if not element.parent:
        return element.get('backend_node_id', 'NaN')
    
    path = _get_element_path(element.parent, current_k + 1, max_k)
    return f"{path}/{element.get('backend_node_id', 'NaN')}".lstrip('/')

def is_element_in_target_path(candidate, target_element, html_content, k=3):
    target_element_path = get_element_path(target_element, html_content, k)
    target_element_id = target_element['backend_node_id']
    
    candidate_path = get_element_path(candidate, html_content, k)
    candidate_node_id = candidate['backend_node_id']
    # It should also check the childrens
    return candidate_node_id in target_element_path or target_element_id in candidate_path

In [63]:
def rag_precision(relevant_elements, candidates, html_content, k=3):
    soup = BeautifulSoup(html_content, 'html.parser')
    # Count the number of relevant elements in the candidates
    num_relevant_elements = 0
    for i, (_, element) in enumerate(relevant_elements.iterrows()):
        for candidate in candidates:
            if is_element_in_target_path(candidate, element, soup, k):
                num_relevant_elements += 1
                break  # Move to the next relevant element
    
    # Compute the precision
    precision = num_relevant_elements / len(relevant_elements)
    return precision

def evaluate_rag(user_prompt, html_content, pos_candidates, neg_candidates, model):
    # Parse HTML
    parsed_df = parse_html_for_rag(html_content)
    
    # Generate embeddings
    embeddings = generate_html_embeddings(parsed_df, model)
    
    # Retrieve elements for the user prompt
    relevant_elements, _ = retrieve_elements(user_prompt, embeddings, parsed_df, model)
    
    # Compute precision
    precision = rag_precision(relevant_elements, pos_candidates, html_content)
    
    return precision
    

In [64]:
from bs4 import BeautifulSoup

precisions = []
for task in train_ds:
    print(f"Task: {task['confirmed_task']}")
    for action in task['actions']:
        html_content = action['cleaned_html']
        pos_candidates = action['pos_candidates']
        neg_candidates = action['neg_candidates']
        
        precision = evaluate_rag(task['confirmed_task'], html_content, pos_candidates, neg_candidates, model)
        print(f"Precision: {precision:.4f}")
        precisions.append(precision)
    print("---")

Task: Check for pickup restaurant available in Boston, NY on March 18, 5pm with just one guest


Precision: 0.2000
Precision: 0.0000
Precision: 0.0000
Precision: 0.2000
Precision: 0.0000
Precision: 0.0000
Precision: 0.0000
Precision: 0.0000
Precision: 0.2000
Precision: 0.2000
Precision: 0.0000
---
Task: Book a winery tour in Napa Valley in a winery which serves Mediterranean cuisine with wine testing for 4 guests on April 15, 10 am in a outdoor setup.
Precision: 0.0000


KeyboardInterrupt: 