In [None]:
from pathlib import Path
import json
from collections import Counter
try:
    CURRENT_DIR_PATH = Path(__file__).resolve().parent.parent
except NameError:
    CURRENT_DIR_PATH = Path.cwd().parent.parent
import sys
sys.path.append(str(CURRENT_DIR_PATH))
from agent.action.base_action import _OP_REGISTRY as GLOBAL_ACTION_REGISTRY
from agent.action.base_action import EXECUTABLE_ACTIONS
import random
import pandas as pd
random.seed(42)


In [None]:
base_action_types = []

for action_name, action_class in GLOBAL_ACTION_REGISTRY.items():
    if action_name in EXECUTABLE_ACTIONS:
       base_action_types.append(action_class.type)

result_dir = CURRENT_DIR_PATH / "WindowsAgentArena" / "src" / "win-arena-container" / "client" / "all_results" / "gpt5low_run7"
# Initialize counter for action frequencies
action_counter = Counter()

# Find all log.jsonl files under result_dir
log_files = list(result_dir.glob("**/log.jsonl"))
category_dict = {}
category_unique_dict = {}
step_counter = 0
# Process each log file
for log_file in log_files:
    step_counter += 2
    with open(log_file, 'r') as f:
        category_name = log_file.parent.parent.name
        for line in f:
            try:
                log_entry = json.loads(line.strip())
                if log_entry["type"] == "RAGPlanner.parse_query_generation":
                    step_counter += 0.5
                if log_entry["type"] == "RAGPlanner.parse_action_selection":
                     step_counter += 0.5
                if log_entry["type"] == "RAGPlanner.parse_action_config":
                    step_counter += 0.5
                if "message" in log_entry and "parsed_action_selection=" in log_entry["message"]:
                    message = log_entry["message"].split("parsed_action_selection=")[1].split(",")[0].split("(")[0]
                    if message[-1] == "'":
                        message += "}"
                    else:
                        message += "'}"
                    action = eval(message)["selected_action"]
                    action_counter[action] += 1
                    if action not in base_action_types:
                        print(action)
                        category_dict[category_name] = category_dict.get(category_name, 0) + 1
                        category_unique_dict[category_name] = category_unique_dict.get(category_name, set())
                        category_unique_dict[category_name].add(action)
            except (json.JSONDecodeError, KeyError, SyntaxError):
                print(message)

# Display the action frequencies
print(f"Found {len(log_files)} log.jsonl files")
print(f"Total actions extracted: {sum(action_counter.values())}")
print("\nAction frequencies:")
for action, count in action_counter.most_common():
    print(f"{action}: {count}")

print("\nLog files count by category:")
for category, count in category_dict.items():
    print(f"{category}: {count}")

In [None]:
category_unique_dict_count = {k: len(v) for k, v in category_unique_dict.items()}
pd.DataFrame(category_unique_dict_count.items(), columns=["Category", "Unique Actions"]).sort_values(by="Category")

In [None]:
category_dict

In [None]:
pd.DataFrame.from_dict(category_dict, orient='index', columns=['count']).sort_index()

In [None]:



selected_action_types = []
for action_name, action_class in GLOBAL_ACTION_REGISTRY.items():
    action_type = action_class.type
    if action_type in action_counter and action_type not in base_action_types:
        selected_action_types.append(action_type)
        
len(selected_action_types)

In [None]:
random.shuffle(selected_action_types)
outpath = CURRENT_DIR_PATH / "agent" / "sample_actions"
outpath.mkdir(exist_ok=True, parents=True)
for i, s in enumerate([36, 72]):
    out_types = selected_action_types[0:s] + base_action_types

    print(f"Total action types: {len(out_types)}")
    with open(outpath / f"{(i + 1) * 33}percent.json", "w") as f:
        json.dump(out_types, f, indent=4)