In [3]:
import json
from utils import load_all_jsons, clean_participant

participants = load_all_jsons("./data")
cleaned = [clean_participant(p) for p in participants]

with open("participants_clean.json", "w", encoding="utf-8") as f:
    json.dump(cleaned, f, ensure_ascii=False, indent=2)

Found 3 JSON files in ./data
Total participants combined: 4
✅ Cleaned participants written to participants_clean.json


In [3]:
from utils import compute_f1
import json

f1_tasks = [
    "adj",
    "adj_mean",
    "adj_var",
    "attr_comb",
    "attr_extremes",
    "adj_mean_extra",
    "adj_var_extra",
    "attr_comb_extra",
    "attr_extremes_extra",
]

with open("participants_clean.json", "r") as f:
    participants = json.load(f)

for participant in participants:
    participant["f1_scores"] = {}
    for task in f1_tasks:
        if task in participant["answers"]:
            answer_data = participant["answers"][task]

            if "answerNodes" in answer_data["answer"]:
                pred = answer_data["answer"]["answerNodes"]
            elif "cluster" in answer_data["answer"]:
                pred = [answer_data["answer"]["cluster"]]
            else:
                pred = []

            correct = answer_data.get("correctAnswer", [])
            if correct:
                correct_node = correct[0]
                if isinstance(correct_node["answer"], list):
                    true = correct_node["answer"]
                else:
                    true = [correct_node["answer"]]
            else:
                true = []
            f1 = compute_f1(pred, true)
            participant["f1_scores"][task] = f1
        else:
            participant["f1_scores"][task] = None


with open("participants_clean_f1_computed.json", "w") as f:
    json.dump(participants, f, indent=2)

In [1]:
from utils import compute_accuracy
import json

accuracy_tasks = [
    "path_mean",
    "path_var",
    "path_mean_extra",
    "path_var_extra",
    "classification",
    "cluster_mean",
    "cluster_var",
    "classification_extra",
    "cluster_mean_extra",
    "cluster_var_extra",
]

with open("participants_clean_f1_computed.json", "r") as f:
    participants = json.load(f)

for participant in participants:
    participant["accuracy_scores"] = {}
    for task in accuracy_tasks:
        if task in participant["answers"]:
            answer_data = participant["answers"][task]
            acc = compute_accuracy(task, answer_data)
            participant["accuracy_scores"][task] = acc
        else:
            participant["accuracy_scores"][task] = None

with open("participants_clean_f1_accuracy_computed.json", "w") as f:
    json.dump(participants, f, indent=2)

In [10]:
import json
import pandas as pd
import numpy as np

with open("participants_clean_f1_accuracy_computed.json", "r") as f:
    participants = json.load(f)

records = []
for participant in participants:
    first_answer = next(iter(participant["answers"].values()))
    encoding = first_answer["parameters"]["encoding"]

    # Completion time
    for task, answer in participant["answers"].items():
        start = answer.get("startTime")
        end = answer.get("endTime")
        if start is not None and end is not None:
            duration = (end - start) / 1000  # segundos
            records.append(
                {
                    "participant": participant["participantId"],
                    "encoding": encoding,
                    "metric": "time",
                    "task": task,
                    "score": duration,
                }
            )

    # F1
    for task, score in participant.get("f1_scores", {}).items():
        if score is not None:
            records.append(
                {
                    "participant": participant["participantId"],
                    "encoding": encoding,
                    "metric": "f1",
                    "task": task,
                    "score": score,
                }
            )

    # Accuracy
    for task, score in participant.get("accuracy_scores", {}).items():
        if score is not None:
            records.append(
                {
                    "participant": participant["participantId"],
                    "encoding": encoding,
                    "metric": "accuracy",
                    "task": task,
                    "score": score,
                }
            )

df = pd.DataFrame(records)

groups = df.groupby(["encoding", "metric", "task"])["score"].apply(list).reset_index()

summary = {}
for _, row in groups.iterrows():
    encoding, metric, task, scores = row
    if encoding not in summary:
        summary[encoding] = {"f1": {}, "accuracy": {}, "time": {}}
    summary[encoding][metric][task] = {
        "scores": scores,
        "mean": float(np.mean(scores)),
        "std": float(np.std(scores, ddof=1)) if len(scores) > 1 else 0.0,
        "n": len(scores),
    }

with open("groups_by_encoding_with_time.json", "w") as f:
    json.dump(summary, f, indent=2)

In [18]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway, kruskal, shapiro, levene
import pingouin as pg
import scikit_posthocs as sp

# --- Cargar datos ---
with open("groups_by_encoding_with_time.json", "r") as f:
    encoding_groups = json.load(f)

records = []
for encoding, metrics in encoding_groups.items():
    for metric, tasks in metrics.items():
        for task, stats in tasks.items():
            for score in stats["scores"]:
                records.append(
                    {
                        "encoding": encoding,
                        "metric": metric,
                        "task": task,
                        "score": score,
                    }
                )

df = pd.DataFrame(records)

results_summary = {}

for metric in df["metric"].unique():
    for task in df["task"].unique():
        sub_df = df[(df["metric"] == metric) & (df["task"] == task)]
        if sub_df.empty:
            continue

        task_key = f"{metric}_{task}"
        groups = sub_df.groupby("encoding")["score"].apply(list).to_dict()

        if len(groups) < 2:
            continue

        # --- Check Assumptions ---
        """ normality = all(shapiro(scores)[1] > 0.05 for scores in groups.values())
        homogeneity = levene(*groups.values())[1] > 0.05 """

        # --- Select Test ---
        if True:
            test_name = "ANOVA"
            F, p = f_oneway(*groups.values())
            # Post-hoc Tukey
            posthoc = pg.pairwise_tukey(dv="score", between="encoding", data=sub_df)
        elif normality and not homogeneity:
            test_name = "Welch ANOVA"
            welch_res = pg.welch_anova(dv="score", between="encoding", data=sub_df)
            F = welch_res["F"].values[0]
            p = welch_res["p-unc"].values[0]
            # Post-hoc Games-Howell
            posthoc = pg.pairwise_gameshowell(
                dv="score", between="encoding", data=sub_df
            )
        else:
            test_name = "Kruskal-Wallis"
            H, p = kruskal(*groups.values())
            F = H
            # Post-hoc Dunn
            posthoc = sp.posthoc_dunn(
                sub_df, val_col="score", group_col="encoding", p_adjust="bonferroni"
            )

        # --- Guardar resultados ---
        results_summary[task_key] = {
            "test": test_name,
            "statistic": F,
            "p_value": p,
            "posthoc": posthoc.to_dict() if hasattr(posthoc, "to_dict") else posthoc,
            "normality": True,
            "homogeneity": True,
        }

        # --- Visualización ---
        plt.figure(figsize=(8, 5))
        sns.boxplot(data=sub_df, x="encoding", y="score", palette="Set2")
        sns.stripplot(
            data=sub_df, x="encoding", y="score", color="black", size=4, alpha=0.6
        )
        plt.title(f"{task_key} ({test_name}, p={p:.4f})")
        plt.ylabel("Score")
        plt.xticks(rotation=30)
        plt.tight_layout()
        plt.savefig(f"./plots/test_plot_{task_key}.png", dpi=150)
        plt.close()

# --- Guardar resultados ---
with open("results_summary.json", "w") as f:
    json.dump(results_summary, f, indent=2)

with open("posthoc_results.txt", "w") as f:
    for task_key, res in results_summary.items():
        f.write(f"=== {task_key} ({res['test']}) ===\n")
        f.write(f"Statistic: {res['statistic']}, p-value: {res['p_value']:.4f}\n")
        f.write("Post-hoc results:\n")
        f.write(json.dumps(res["posthoc"], indent=2))
        f.write("\n\n")

  fval = msbetween / mserror
  np2 = ssbetween / (ssbetween + sserror)  # = ssbetween / sstotal
  tval = mn / se
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  poolsd = np.sqrt(((nx - 1) * x.var(ddof=1) + (ny - 1) * y.var(ddof=1)) / dof)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=sub_df, x="encoding", y="score", palette="Set2")
  fval = msbetween / mserror
  np2 = ssbetween / (ssbetween + sserror)  # = ssbetween / sstotal
  tval = mn / se
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  poolsd = np.sqrt(((nx - 1) * x.var(ddof=1) + (ny - 1) * y.var(ddof=1)) / dof)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data

TypeError: Object of type ndarray is not JSON serializable