In [1]:
import json
import pandas as pd
import os
import sys
import wandb
from urllib.parse import urlparse, parse_qs
from collections import defaultdict, Counter
from dotenv import load_dotenv
import re
load_dotenv()

WBPROJECT = os.getenv("WBPROJECT")
WBENTITIY = os.getenv("WBENTITIY")
INDIVIDUAL_RUNS_PATH = os.getenv("INDIVIDUAL_RUNS_PATH")
QUERIES_PATH = os.getenv("QUERIES_PATH")

In [2]:
def parse_log_file(file_path):
    with open(file_path, 'r') as log_file:
        for line in log_file:
            yield json.loads(line.strip())


In [3]:
# helper functions:
from urllib.parse import urlparse
from collections import defaultdict

# Extract base domain
def extract_base_domain(url):
    parsed_url = urlparse(url)
    # Remove subdomains (e.g., www.)
    base_domain = parsed_url.netloc.split(":")[0]  # Removes any port if present
    return base_domain

# Count unique websites from visited URLs
def count_unique_websites(visited_urls):
    domain_counts = defaultdict(int)
    for url, data in visited_urls.items():
        visits = data.get("visits", 0)
        base_domain = extract_base_domain(url)
        if base_domain:
            domain_counts[base_domain] += visits
    return domain_counts

In [4]:
def extract_metrics(log_file_path):
    metrics = {
        "query": None,  # To store the query from the UserProxy
        "final_answer": None,  # To store the final answer from the Orchestrator
        "difficulty": None,  # To store difficulty level
        "id_query": None,  # To store query ID
        "query_num_listings": None,  # To store number of requested listings in the query
        "query_num_websites": None,  # To store number of requested websites in the query
        "num_webpage_visits": None, #DONE
        "model_calls": 0, #DONE
        "input_tokens": 0, #DONE
        "output_tokens": 0, #DONE
        "total_tokens": 0, #DONE
        "screenshots": 0, #DONE
        "errors": 0, 
        "failed_action_attempts": None, #TODO
        "visited_urls": {},
        "domain_counts" : {}, #DONE
        # postprocessing
        "success": 0, 
        "complete_fulfilment_ratio": None,
        "requirement_score": None,
        "number_listings": None,
        "website_coverage": None,
        "num_type_search_engine": 0, #DONE
        "num_type_immo_website": 0, #DONE
        "num_queries_search_engine": 0, #DONE
        "num_queries_immo_website": 0, #DONE
        "primary_failure": None,
        "secondary_failure": None,
        "folder_name": None, #DONE
        "timestamp": None,
        "model_name": "magentic",
        "text_model": 1,
        "vision_model": 1,
        "multi_agent": 1
    }

    search_engine_domains = {"www.bing.com", "www.google.com"}

    for log in parse_log_file(log_file_path):
        # Extract query from UserProxy
        if log.get("source") == "UserProxy" and "message" in log:
            metrics["query"] = log["message"]

        # Extract final answer from Orchestrator
        if log.get("source") == "Orchestrator (final answer)" and "message" in log:
            metrics["final_answer"] = log["message"]

        # Extract initialization arguments
        if log.get("type") == "Initialization" and "arguments" in log:
            args = log["arguments"]
            metrics["difficulty"] = args.get("difficulty", None)
            metrics["id_query"] = args.get("id_query", None)
            metrics["query_num_listings"] = args.get("query_num_listings", None)
            metrics["query_num_websites"] = args.get("query_num_websites", None)
            folder_name = args.get("folder_name", None)
            metrics["folder_name"] = folder_name 

        if log.get("type") == "WebSurferEvent" and "url" in log:
            url = log["url"]
            action = log.get("action", None)
            
            # Extract domain for this URL
            domain = extract_base_domain(url)

            # Initialize the URL entry if not already present
            if url not in metrics["visited_urls"]:
                metrics["visited_urls"][url] = {"visits": 0, "clicking": 0, "typing": 0}

            # Increment visits
            metrics["visited_urls"][url]["visits"] += 1

            # Increment specific action counters
            if action == "click":
                metrics["visited_urls"][url]["clicking"] += 1
            elif action == "web_search":
                metrics["visited_urls"][url]["typing"] += 1

                # Count search engine queries
                if domain in search_engine_domains:
                    metrics["num_queries_search_engine"] += 1
                else:  # Assume non-search-engine searches are real estate queries
                    metrics["num_queries_immo_website"] += 1


            if "screenshot" in log.get("message", "").lower():
                metrics["screenshots"] += 1

        elif log.get("type") == "LLMCallEvent":
            metrics["model_calls"] += 1
            metrics["input_tokens"] += log.get("prompt_tokens", 0)
            metrics["output_tokens"] += log.get("completion_tokens", 0)

        elif "error" in log.get("message", "").lower():
            metrics["errors"] += 1


    # Calculate derived metrics
    metrics["total_tokens"] = metrics["input_tokens"] + metrics["output_tokens"]
    metrics["num_type_search_engine"] = metrics["num_queries_search_engine"]
    metrics["num_type_immo_website"] = metrics["num_queries_immo_website"]
    metrics["num_webpage_visits"] = len(metrics["visited_urls"])
    metrics["domain_counts"] = count_unique_websites(metrics["visited_urls"])
    metrics["timestamp"] = re.search(r'(\d{14})', folder_name).group(1)

    return metrics


In [5]:
# file_path = "../logs/archive/vision_magnetic_TEST_02_20241215154814/log.jsonl"
file_path = "../logs/vision_magnetic_advanced_02_20241217204927/log.jsonl"

In [6]:
metrics = extract_metrics(file_path)

In [None]:
metrics

#### Manual updates metrics

In [8]:
# Append the root directory path to sys.path
sys.path.append(QUERIES_PATH )
from queries.failure_reasons import FAILURE_REASONS, modify_metrics

In [12]:
def update_and_save_metrics(metrics, updates):
    """
    Updates the metrics dictionary with the given updates and saves it as 'metrics.json' 
    in the folder specified by the 'folder_name' key in metrics.

    Args:
        metrics (dict): The extracted metrics to update.
        updates (dict): A dictionary containing the new values for postprocessing metrics.

    Returns:
        str: The path to the updated metrics file.
    """
    # Update the metrics with the new values
    for key, value in updates.items():
        metrics[key] = value

    # Ensure 'folder_name' is in the metrics
    if "folder_name" not in metrics or not metrics["folder_name"]:
        raise ValueError("The metrics dictionary must contain a valid 'folder_name' key.")

    # Extract the folder name from metrics
    output_folder = metrics["folder_name"]

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Define the output path for the updated metrics.json
    output_path = os.path.join(output_folder, "metrics.json")

    # Write the updated metrics to the file
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(metrics, f, indent=4)

    print(f"Updated metrics saved to: {output_path}")
    return output_path

In [None]:
sorted(FAILURE_REASONS)

In [None]:
# New values for the postprocessing metrics
updates = {
    "success": 0,
    "primary_failure": 'token_limitation',
    "secondary_failure": ['captcha'],


}


# Modify the metrics
print("Metrics before update:", metrics)
update_and_save_metrics(metrics, updates)
print("Updated Metrics:", metrics)


### Logging results

In [15]:
def log_dataframe_to_wandb(df, project, entity, artifact_name="experiment_metrics", artifact_type="dataset"):
    """
    Log a Pandas DataFrame to W&B, ensuring dictionaries remain in single cells.

    Args:
        df (pd.DataFrame): The Pandas DataFrame to log.
        project (str): W&B project name.
        entity (str): W&B entity name.
        artifact_name (str): Name of the W&B artifact.
        artifact_type (str): Type of the W&B artifact.

    Returns:
        None
    """
    # Serialize dictionary columns to JSON strings
    for col in df.columns:
        if any(isinstance(value, dict) for value in df[col]):
            df[col] = df[col].apply(json.dumps)

    # Initialize W&B
    wandb.init(project=project, entity=entity)

    # Define W&B table columns explicitly
    columns = df.columns.tolist()
    wandb_table = wandb.Table(columns=columns)

    # Add rows to the W&B table
    for _, row in df.iterrows():
        wandb_table.add_data(*row)

    # Log the table
    wandb.log({"Experiment Metrics": wandb_table})

    # Finish the W&B run
    wandb.finish()



In [16]:
import pandas as pd
import json

def display_metrics_as_table(metrics):
    """
    Display metrics in a tabular format using Pandas.

    Args:
        metrics (dict): Dictionary of metrics from a single experiment.

    Returns:
        pd.DataFrame: A Pandas DataFrame representation of the metrics.
    """
    # Convert the metrics dictionary to a DataFrame
    table = pd.DataFrame([metrics])  # Wrap metrics in a list to create a single-row DataFrame
    return table

In [None]:
df = display_metrics_as_table(metrics)
df.head()

In [None]:
log_dataframe_to_wandb(
    df,
    project=WBPROJECT,
    entity=WBENTITIY
)

In [None]:
# load csv
df_combined = pd.read_csv(INDIVIDUAL_RUNS_PATH, sep=";") #TODO
df_combined.tail(2)

In [20]:
df_combined = df_combined.astype(object)
df = df.astype(object)
df_combined = pd.concat([df_combined, df], ignore_index=True)
# save to csv
df_combined.to_csv(INDIVIDUAL_RUNS_PATH, sep=";", index=False)

In [None]:
df_combined.tail(4)

### Rename columns

In [None]:
# # # load csv
# df= pd.read_csv(INDIVIDUAL_RUNS_PATH, sep=";") #TODO
# df.head(1)

In [67]:
# # rename columns: 
# """ 
# ID_query -> id_query
# num_listings -> query_num_listings
# num_pages -> query_num_websites
# llm_calls -> model_calls
# no_action -> failed_action_attempts
# """
# df.rename(columns={
#     "ID_query": "id_query",
#     "num_listings": "query_num_listings",
#     "num_pages": "query_num_websites",
#     "llm_calls": "model_calls",
#     "no_action": "failed_action_attempts"
# }, inplace=True)

In [None]:
# df.head(1)

In [69]:
# # using 'foldername', extract timestamp and save as 'timestamp' column
# df['timestamp'] = df['folder_name'].str.extract(r'_(\d{14})')

In [70]:
# # save to csv
# df.to_csv(INDIVIDUAL_RUNS_PATH, sep=";", index=False)