# RAG functional evaluation

## Load embeddings and models

In [1]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, CrossEncoder, util

# Load dataset
task_prompts_df = pd.read_csv('dataset/task_prompts.csv')
nodes_df = pd.read_csv('dataset/nodes.csv')

print("Task prompts:", task_prompts_df.shape)
print("Nodes:", nodes_df.shape)

# Load embedings
task_prompt_embedings = np.load('dataset/task_prompts_embeddings.npy')
nodes_texts_embedings = np.load('dataset/nodes_texts_embeddings.npy')

print("Task prompts embedings:", task_prompt_embedings.shape)
print("Nodes texts embedings:", nodes_texts_embedings.shape)

# Prepare embeddings models
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

  from tqdm.autonotebook import tqdm, trange
2024-10-21 09:44:58.618546: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-21 09:44:58.745279: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-21 09:44:59.379281: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Task prompts: (30, 2)
Nodes: (161140, 8)
Task prompts embedings: (30, 768)
Nodes texts embedings: (161140, 768)




## RETRIEVAL

In [2]:
# Get the nodes indexes for a given task_id
def get_nodes_indexes(action_id, nodes_df):
    return nodes_df[nodes_df['action_id'] == action_id].index


def get_nodes_embeddigs(nodes_indexes, nodes_texts_embedings):
    return nodes_texts_embedings[nodes_indexes]


def get_task_embedings(task_index, task_prompt_embedings):
    # Get the task prompt embedings
    task_embedints = task_prompt_embedings[task_index]    
    return task_embedints

def get_top_nodes(similarities, k=10):
    import tensorflow as tf
    # Get top k nodes, but ensure k is not larger than the number of similarities
    k = min(k, tf.shape(similarities)[0])
    top_values, indices = tf.math.top_k(similarities, k)
    return top_values.numpy().flatten(), indices.numpy().flatten()

def get_top_nodes_with_rerank(similarities, k=10):
    pass

def get_topk_from_action(action_df, nodes_df, task_prompt, task_index, task_prompt_embedings, nodes_texts_embedings, model, cross_encoder, k=10):
    action_id = action_df['action_id'].iloc[0]
    results = {}

    # The specific task prompt embeddings
    prompt_embedings = get_task_embedings(task_index, task_prompt_embedings)
    
    # Get the nodes embeddings
    nodes_indexes = get_nodes_indexes(action_id, nodes_df)
    nodes_embedings = get_nodes_embeddigs(nodes_indexes, nodes_texts_embedings)
    
    # Perform semantic search
    hits = util.semantic_search(prompt_embedings, nodes_embedings, top_k=100)
    hits = hits[0]
    
    # Extract the top k nodes and their scores
    top_nodes = [(nodes_df.loc[hit['corpus_id']], hit['score']) for hit in hits]
    
    # Add error checking for reranker
    if not isinstance(reranker, CrossEncoder):
        print(f"Error: reranker is not a CrossEncoder object. It is a {type(reranker)}.")
        # You might want to return early or handle this error appropriately
        return None
    
    # Perform reranking
    try:
        reranker_input = [(task_prompt, node_text.iloc[0]) for node_text, _ in top_nodes]
        reranker_scores = cross_encoder.predict(reranker_input)
    except AttributeError as e:
        print(f"Error when calling reranker.predict: {e}")
        # Handle the error appropriately, maybe by skipping reranking
        reranker_scores = [0] * len(top_nodes)
    
    # Perform reranking
    #reranker_scores = reranker.predict([(task_prompt, node_text.iloc[0]) for node_text, _ in top_nodes])
    
    for idx in range(len(hits)):
        hits[idx]['cross_score'] = reranker_scores[idx]
        
    
    # Store the top k nodes and their scores
    sorted_nodes_values = sorted(hits, key=lambda x: x['score'], reverse=True)[:k]
    sorted_nodes_reranker_values = sorted(hits, key=lambda x: x['cross_score'], reverse=True)[:k]
    # Get the nodes from the values
    sorted_nodes = [nodes_df.loc[node['corpus_id']] for node in sorted_nodes_values]
    sorted_nodes_reranker = [nodes_df.loc[node['corpus_id']] for node in sorted_nodes_reranker_values]
    
    return sorted_nodes, sorted_nodes_reranker

In [3]:
# Example task
task = task_prompts_df.loc[0]
# Task information
task_id = task['task_id']
task_index = task_prompts_df[task_prompts_df['task_id'] == task_id].index[0]

user_instruction = task['prompt']
# Exaple action
actions_ids = nodes_df[nodes_df['task_id'] == task_id]['action_id'].unique()
action = nodes_df[nodes_df['action_id'] == actions_ids[0]]

# Get the top k nodes
top_nodes, top_nodes_reranker = get_topk_from_action(action, nodes_df, user_instruction, task_index, task_prompt_embedings, nodes_texts_embedings, model, reranker, k=10)

## Generation

In [23]:
# Generation prompt
retrieval_prompt = """You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. Your objective is to generate a sequence of actions to take in order to complete the task.
Here's the information you'll have:

1- The user's instructions: This is the task you're trying to complete.
2- The current web page's elements to interact with: This is a simplified representation of the webpage, providing key information.

Answer Format: In order to complete the task successfully, you need to provide the positive candidates in the following format:
 - Include all the candidates in the bracket.
 - Separate each candidate by a comma.
 - If there are no candidates, provide an empty bracket.
 
To be successful, it is very important to follow the following rules:
1. You should reason the steps needed to achieve the task, extract what steps have been taken from the ACTIONS HISTORY, and issu what the following actions should be.
2. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.
3. You should not issue the same action twice.

TYPES OF ACTIONS:
1. Click: Click on a button or a link.
2. Type: Type text into a text box.
3. Hover: Hover over an element.

EXAMPLE OF ACTIONS:
- Click node with id "619".
- Select Pickup from node "2134"

USER INSTRUCTIONS: {user_instruction}
WEBPAGE ELEMENTS: {elements}

OUTPUT EXAMPLE:
output:[Click node with id "619", Click node with id "620", Click node with id "621"]
"""



In [18]:
# Format the nodes to text as if they where in JSON format
def topk_to_text(topk_nodes):
    text = ""
    for idx, node in enumerate(topk_nodes):
        text += f"Node {node['node_id']}: {node['text']}\n"
    return text

In [26]:
# Format prompt
elements = topk_to_text(top_nodes)
content = retrieval_prompt.format(user_instruction=user_instruction, elements=elements)
print(content)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. Your objective is to generate a sequence of actions to take in order to complete the task.
Here's the information you'll have:

1- The user's instructions: This is the task you're trying to complete.
2- The current web page's elements to interact with: This is a simplified representation of the webpage, providing key information.

Answer Format: In order to complete the task successfully, you need to provide the positive candidates in the following format:
 - Include all the candidates in the bracket.
 - Separate each candidate by a comma.
 - If there are no candidates, provide an empty bracket.
 
To be successful, it is very important to follow the following rules:
1. You should reason the steps needed to achieve the task, extract what steps have been taken from the ACTIONS HISTORY, and issu what the following actions should be.
2. Issue stop action when you think you have 

In [9]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

max_tokens = 2048
model_name = "astronomer/Llama-3-8B-Instruct-GPTQ-8-Bit"

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main",
                                             output_loading_info=False)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, model_max_length=max_tokens)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    
import torch

ModuleNotFoundError: No module named 'optimum'