# Aggregate results

This notebook aggregates experiment results from multiple batch folders into a single deduplicated dataset. It:

1. **Loads all experiment batches** from the results directory
2. **Imports all JSON result files** across all batches into a unified DataFrame
3. **Identifies and resolves duplicates** based on agent_id, fault_name, and scenario
4. **Creates a clean aggregate folder** containing only unique experiments (one per agent-fault-scenario combination)

This is useful when you have run the same experiments across multiple batches and want to consolidate them for analysis.

In [None]:
from dotenv import load_dotenv
import os, json
import pandas as pd

# Get the path to the root directory of the repository
root_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))

# Load environment variables from .env file in the root directory
load_dotenv(os.path.join(root_dir, '.env'))

## Load resutls batches

In [None]:
RESULTS_DIR = os.environ.get("RESULTS_PATH")
print(f"The results are stored in the directory {RESULTS_DIR}")

In [None]:
experiment_batches = [d for d in os.listdir(RESULTS_DIR) if os.path.isdir(os.path.join(RESULTS_DIR, d))]
for i, d in enumerate(experiment_batches, 1):
    dir_path = os.path.join(RESULTS_DIR, d)
    json_count = sum(
        1 for f in os.listdir(dir_path)
        if f.endswith('.json') and os.path.isfile(os.path.join(dir_path, f))
    )
    print(f"{i}) {d} ({json_count} result{'s' if json_count != 1 else ''})")

## Load faults and agent configurations

In [None]:
FAULTS_CONF_DIR = os.environ.get("FAULTS_CONF_DIR")

faults_conf_files = [f for f in os.listdir(FAULTS_CONF_DIR) if f.endswith('.json')]

for f in faults_conf_files:
    print(f"- {f}")

In [None]:
agent_name_dict = {}

AGENTS_CONF_DIR = os.environ.get("AGENTS_CONF_DIR")

agents_conf_files = [f for f in os.listdir(AGENTS_CONF_DIR) if f.endswith('.json')]

for f in agents_conf_files:
    with open(os.path.join(AGENTS_CONF_DIR,f), 'r') as file:
        data = json.load(file)
        agent_name_dict[data.get("name")] = data.get("id")

In [None]:
agent_name_dict

## Import all results

In [None]:
experiments_df = pd.DataFrame()

for batch in experiment_batches:
    batch_path = os.path.join(RESULTS_DIR, batch)
    experiments = [f for f in os.listdir(batch_path) if f.endswith('.json')]
    for experiment in experiments:
        try:
            with open(os.path.join(batch_path, experiment), 'r') as file:
                data = json.load(file)

                localization = data.get("final_report", {}).get("localization", [])
                if isinstance(localization, list):
                    localization_str = ", ".join(localization)
                else:
                    localization_str = None

            record = {
                "batch": batch,
                "experiment_file": experiment,
                "agent_id" : data.get("agent_id", None),
                "agent_conf_name" : data.get("agent_configuration_name", None),
                "scenario": data.get("app_name", None).lower(),
                "fault_name": data.get("testbed", {}).get("fault_name", None),
                "target_namespace": data.get("target_namespace", None),
                "trace_service_starting_point": data.get("trace_service_starting_point", None),
                "rca_tasks_per_iteration": data.get("testbed", {}).get("rca_tasks_per_iteration", 0),
                "max_tool_calls": data.get("testbed", {}).get("max_tool_calls", 0),
                "execution_time_seconds": data.get("stats", {}).get("execution_time_seconds", 0),
                "total_tokens": data.get("stats", {}).get("total_tokens", 0),
                "tokens_triage": data.get("stats", {}).get("agent_stats", {}).get("triage_agent", {}).get("total_tokens", 0),
                "tokens_planner": data.get("stats", {}).get("agent_stats", {}).get("planner_agent", {}).get("total_tokens", 0),
                "tokens_rca_worker": data.get("stats", {}).get("agent_stats", {}).get("rca_agent", {}).get("total_tokens", 0),
                "runs_count_rca": data.get("stats", {}).get("agent_stats", {}).get("rca_agent", {}).get("runs_count", 0),
                "tokens_supervisor": data.get("stats", {}).get("agent_stats", {}).get("supervisor_agent", {}).get("total_tokens", 0),
                "runs_count_supervisor": data.get("stats", {}).get("agent_stats", {}).get("supervisor_agent", {}).get("runs_count", 0),
                "detection": data.get("final_report", {}).get("detection", None),
                "localization": localization_str, 
                "root_cause": data.get("final_report", {}).get("root_cause", None),
                "eval_detection" : data.get("evaluation", {}).get("detection", None),
                "eval_localization" : data.get("evaluation", {}).get("localization", None),
                "eval_rca_score" : data.get("evaluation", {}).get("rca_score", None),
                "eval_rca_motivation" : data.get("evaluation", {}).get("rca_motivation", None),
            }

            if not record['agent_id']:
                record['agent_id'] = next((id for conf, id in agent_name_dict.items() if conf in record['agent_conf_name']), None)

            # Append record to dataframe
            experiments_df = pd.concat([experiments_df, pd.DataFrame([record])], ignore_index=True)
        
        except (json.JSONDecodeError, KeyError) as e:
            print(f"Warning: Error processing {experiment}: {str(e)}")
            continue

experiments_df.shape

In [None]:
experiments_df

## Deduplicate dataset

In [None]:
# Group by agent_id and fault_name, aggregating count and list of batches
grouped_df = experiments_df.groupby(['agent_id', 'fault_name']).agg(
    count=('experiment_file', 'count'),
    batches=('batch', lambda x: ', '.join(sorted(set(x))))
).reset_index()

grouped_df

In [None]:
# Get unique scenarios and let user choose
scenarios = sorted(experiments_df['scenario'].unique())
print(f"\nAvailable apps/scenarios:")
for i, scenario in enumerate(scenarios, 1):
    count = len(experiments_df[experiments_df['scenario'] == scenario])
    print(f"{i}) {scenario} ({count} results)")

while True:
    try:
        choice = int(input(f"\nSelect an app (1-{len(scenarios)}): "))
        if 1 <= choice <= len(scenarios):
            selected_scenario = scenarios[choice - 1]
            break
        else:
            print(f"Invalid choice. Please enter a number between 1 and {len(scenarios)}")
    except ValueError:
        print("Invalid input. Please enter a valid number.")

print(f"\nSelected: {selected_scenario}")

# Filter dataframe to selected scenario
filtered_df = experiments_df[experiments_df['scenario'] == selected_scenario].copy()
print(f"Found {len(filtered_df)} results for {selected_scenario}")

In [None]:
from IPython.display import clear_output

def resolve_duplicates(df, automatic=False):
    # Find duplicates based on agent_id, fault_name
    duplicate_mask = df.duplicated(subset=['agent_id', 'fault_name'], keep=False)
    unique_rows = []
    
    for _, group in df[duplicate_mask].groupby(['agent_id', 'fault_name']):
        if automatic:
            # Automatically select the most recent JSON file (by modification time)
            group_with_mtime = group.copy()
            group_with_mtime['mtime'] = group_with_mtime.apply(
                lambda row: os.path.getmtime(os.path.join(RESULTS_DIR, row['batch'], row['experiment_file'])),
                axis=1
            )
            newest = group_with_mtime.loc[group_with_mtime['mtime'].idxmax()]
            unique_rows.append(newest)
        else:
            print(f"\nDuplicate found for: Agent {group.iloc[0]['agent_id']} - {group.iloc[0]['fault_name']}")
            print(f"\n{'Index':<7} {'Batch':<40}")
            print("-" * 50)
            for idx, row in enumerate(group.itertuples()):
                print(f"{idx:<7} {row.batch:<40}")
            
            while True:
                try:
                    idx = int(input(f"\nSelect the index of the row to keep (0-{len(group)-1}): "))
                    if 0 <= idx < len(group):
                        break
                    else:
                        print(f"Invalid index. Please enter a number between 0 and {len(group)-1}")
                except ValueError:
                    print("Invalid input. Please enter a valid number.")
            
            unique_rows.append(group.iloc[idx])
            clear_output()
    
    # Non-duplicates
    non_duplicates = df[~duplicate_mask]
    
    # Combine
    result_df = pd.concat([non_duplicates, pd.DataFrame(unique_rows)], ignore_index=True)
    return result_df

unique_experiments_df = resolve_duplicates(filtered_df, automatic=True)

In [None]:
unique_experiments_df.shape

## Store results in a folder

In [None]:
import shutil

dir_aggregate_name = input("Enter a name for the aggregate results folder: ")

# Create a new folder in RESULTS_DIR
unique_results_dir = os.path.join(RESULTS_DIR, dir_aggregate_name)
os.makedirs(unique_results_dir, exist_ok=True)

# Copy all files listed in unique_experiments_df['experiment_file'] from their batch folders
for _, row in unique_experiments_df.iterrows():
    src = os.path.join(RESULTS_DIR, row['batch'], row['experiment_file'])
    dst = os.path.join(unique_results_dir, row['experiment_file'])
    shutil.copy2(src, dst)