In [6]:
from pathlib import Path
from data_analyzer import (
    load_spider2v_tasks,
    compute_stats,
    plot_density_histogram,
    plot_instruction_lengths_kde,
    plot_static_category_pie,
    save_extended_summary,
)
import pandas as pd

# Set up directories
examples_dir = Path("../evaluation_examples/examples")
output_dir = Path("./data")
output_dir.mkdir(parents=True, exist_ok=True)

# Load and analyze tasks
tasks = load_spider2v_tasks(examples_dir)
stats = compute_stats(tasks)

# Plot results
plot_density_histogram(
    stats["action_steps"], "Distribution of Action Steps", "Steps", "action_steps_hist.png", output_dir
)
plot_density_histogram(
    stats["related_apps"], "Related Applications per Task", "Apps", "related_apps_hist.png", output_dir
)
plot_instruction_lengths_kde(stats, output_dir)
plot_static_category_pie(examples_dir, output_dir)

# Save summary
summary_df = save_extended_summary(tasks, stats, output_dir)
display(summary_df)
display(summary_df.to_dict())

# FIX: Reformat DataFrame for LaTeX export
summary_df_latex = (
    pd.DataFrame.from_dict(summary_df.to_dict()["Value"], orient="index", columns=["Count / Metric"])
    .rename_axis("Value")
    .reset_index()
)

# Convert to LaTeX table
latex_table = summary_df_latex.to_latex(
    index=False, caption="Summary of Task Types and Complexity in Spider2-V", label="tab:spider2v-summary", escape=False
)

print(latex_table)

Unnamed: 0,Value
Total Tasks,494 (100%)
Pure CLI,28 (5.7%)
Pure GUI,184 (37.2%)
CLI + GUI,282 (57.1%)
w. Authentic User Account,170 (34.4%)
w/o. Authentic User Account,324 (65.6%)
Easy (≤ 5),98 (19.8%)
Medium (6 ~ 15),310 (62.8%)
Hard (> 15),86 (17.4%)
Avg. Action Steps (P25/P50/P75),10.64 / 7 / 9 / 13


{'Value': {'Total Tasks': '494 (100%)',
  'Pure CLI': '28 (5.7%)',
  'Pure GUI': '184 (37.2%)',
  'CLI + GUI': '282 (57.1%)',
  'w. Authentic User Account': '170 (34.4%)',
  'w/o. Authentic User Account': '324 (65.6%)',
  'Easy (≤ 5)': '98 (19.8%)',
  'Medium (6 ~ 15)': '310 (62.8%)',
  'Hard (> 15)': '86 (17.4%)',
  'Avg. Action Steps (P25/P50/P75)': '10.64 / 7 / 9 / 13',
  'Avg. Length of Abstract Instructions': 37.1,
  'Avg. Length of Verbose Instructions': 246.5,
  'Avg. Number of Used Apps Per Task': 2.5}}

\begin{table}
\caption{Summary of Task Types and Complexity in Spider2-V}
\label{tab:spider2v-summary}
\begin{tabular}{ll}
\toprule
Value & Count / Metric \\
\midrule
Total Tasks & 494 (100%) \\
Pure CLI & 28 (5.7%) \\
Pure GUI & 184 (37.2%) \\
CLI + GUI & 282 (57.1%) \\
w. Authentic User Account & 170 (34.4%) \\
w/o. Authentic User Account & 324 (65.6%) \\
Easy (≤ 5) & 98 (19.8%) \\
Medium (6 ~ 15) & 310 (62.8%) \\
Hard (> 15) & 86 (17.4%) \\
Avg. Action Steps (P25/P50/P75) & 10.64 / 7 / 9 / 13 \\
Avg. Length of Abstract Instructions & 37.100000 \\
Avg. Length of Verbose Instructions & 246.500000 \\
Avg. Number of Used Apps Per Task & 2.500000 \\
\bottomrule
\end{tabular}
\end{table}



: 