In [1]:
import os
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from azure.identity import DefaultAzureCredential
from azure.storage.blob import ContainerClient
try:
    CURRENT_DIR_PATH = Path(__file__).resolve().parent
except NameError:
    CURRENT_DIR_PATH = Path.cwd()

#### Data Analysis

In [2]:
def combine_json_results(dir_path):
    """
    Iterate through directory to find all result.json files and combine into DataFrame
    """
    results = []
    
    # Walk through directory tree
    for root, dirs, files in os.walk(dir_path):
        for file in files:
            if file == 'result.json':
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r') as f:
                        data = json.load(f)
                        # Add file path for reference
                        data['file_path'] = file_path
                        results.append(data)
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    
    # Create DataFrame from results
    df = pd.DataFrame(results)
    df['result'] = df['result'].fillna(0)
    df['category'] = df['file_path'].apply(lambda x: os.path.basename(os.path.dirname(os.path.dirname(x))))
    df['task_name'] = df['file_path'].apply(lambda x: os.path.basename(os.path.dirname(x)))
    df["IFN_score"] = df.apply(lambda row: 1 if row["total_steps"]==0 and row["task_name"].startswith("INF") else 0, axis=1)
    df["result"] = df["result"] + df["IFN_score"]
    
    return df
    # Extract filename and parent folder from file_path
def show_results(result_dir_path):
    df = combine_json_results(result_dir_path)
    print(f"the average rate is {df['result'].mean()}")
    print("success rate by category")
    display(df[["category", "result"]].groupby("category").mean())
    print(f"total number of tasks completed {len(df)}")
    # print("number of tasks completed by category")
    # display(df[["category", "result"]].groupby("category").count())
    return df



In [None]:

run_dir = CURRENT_DIR_PATH / "../../WindowsAgentArena/src/win-arena-container/client/results/0"
run_dir5 = CURRENT_DIR_PATH / "../../WindowsAgentArena/src/win-arena-container/client/all_results/gpt5low_run7"

In [None]:
df = show_results(run_dir)

the average rate is 0.47097567181221467
success rate by category


Unnamed: 0_level_0,result
category,Unnamed: 1_level_1
chrome,0.588235
clock,0.75
file_explorer,0.421053
microsoft_paint,0.333333
msedge,0.615385
msoffice_excel,0.291667
msoffice_word,0.388889
notepad,0.5
settings,0.8
vlc,0.288537


total number of tasks completed 153


#### Download the image from azure blob

In [None]:


def _add_suffix_to_rel_path(rel_path: str, suffix: str) -> str:
    """Helper to add suffix before extension (if any)."""
    base, ext = os.path.splitext(rel_path)
    return f"{base}{suffix}{ext}"


def download_blobs_from_container(
    account_url: str,
    container_name: str,
    prefix: str,
    download_dir: str,
    conflict_policy: str = "skip_file",   # "rename", "skip_file", or "skip_dir"
    conflict_suffix: str = "__file"
) -> None:
    """
    Download all blobs from an Azure container that match the given prefix,
    handling file/directory name conflicts.

    Parameters
    ----------
    account_url : str
        Azure storage account URL.
    container_name : str
        Name of the container to download from.
    prefix : str
        Blob name prefix to filter.
    download_dir : str
        Local directory to download into.
    conflict_policy : {"rename", "skip_file", "skip_dir"}, default="rename"
        - "rename": rename the conflicting file locally using conflict_suffix
        - "skip_file": skip the conflicting file, keep directory contents
        - "skip_dir": keep the conflicting file, skip directory contents
    conflict_suffix : str, default="__file"
        Suffix to append when renaming conflicting files.
    """
    credential = DefaultAzureCredential()
    container_client = ContainerClient(account_url, container_name, credential=credential)

    blobs = list(container_client.list_blobs(name_starts_with=prefix))

    # Build relative paths
    rel_paths = []
    for b in blobs:
        if not b.name.startswith(prefix):
            continue
        rel = b.name[len(prefix):].lstrip("/")
        rel_paths.append(rel)

    # Distinguish files vs. directory placeholders
    file_paths = set()
    dir_placeholders = set()
    for p in rel_paths:
        if p == "" or p.endswith("/"):
            dp = p.rstrip("/")
            if dp:
                dir_placeholders.add(dp)
            continue
        file_paths.add(p)

    # Collect implied directories from file paths
    implied_dirs = set()
    for p in file_paths:
        parts = p.split("/")
        for i in range(1, len(parts)):
            implied_dirs.add("/".join(parts[:i]))

    all_dirs = implied_dirs | dir_placeholders

    # Conflicts = paths that are both file and directory
    conflicts = file_paths & all_dirs

    # If skipping directories, mark all descendants to skip
    skip_all_under = set()
    if conflict_policy == "skip_dir":
        for c in conflicts:
            skip_all_under.add(c + "/")

    # Prepare download plan
    plan = []
    for b in blobs:
        if not b.name.startswith(prefix):
            continue
        rel = b.name[len(prefix):].lstrip("/")

        if rel == "" or rel.endswith("/"):
            continue

        # Skip if under a directory marked for skipping
        if conflict_policy == "skip_dir":
            if any(rel.startswith(cprefix) for cprefix in skip_all_under):
                continue

        local_rel = rel
        if rel in conflicts:
            if conflict_policy == "skip_file":
                continue
            elif conflict_policy == "rename":
                local_rel = _add_suffix_to_rel_path(rel, conflict_suffix)
            # skip_dir: keep the file as-is

        local_rel_fs = local_rel.replace("/", os.sep)
        local_path = os.path.join(download_dir, local_rel_fs)
        plan.append((b.name, local_path))

    # Download files
    for blob_name, local_path in tqdm(plan, total=len(plan), desc="Downloading blobs"):
        dir_name = os.path.dirname(local_path)
        if dir_name:
            os.makedirs(dir_name, exist_ok=True)
            print(blob_name)
        downloader = container_client.download_blob(blob_name)
        with open(local_path, "wb") as f:
            downloader.readinto(f)

    # Create empty directories f
    # 
    # 
    # or placeholders and implied dirs
    for d in all_dirs:
        d_local = os.path.join(download_dir, d.replace("/", os.sep))
        os.makedirs(d_local, exist_ok=True)

    print(
        f"All blobs downloaded. Conflicts: {len(conflicts)} "
        f"handled with policy '{conflict_policy}'."
    )
