In [None]:
import os
import json
import csv
from datetime import datetime


def extract_f1_scores(dashboard_dir, output_csv):
    """
    Traverse the dashboard directory, extract macro avg F1 scores from JSON files,
    and save to a CSV file along with relevant metadata.

    Parameters:
        dashboard_dir (str): Base directory containing the structured dashboard outputs.
        output_csv (str): Path to save the resulting CSV file.
    """
    # Ensure the output directory exists
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)

    # Open the CSV file for writing
    with open(output_csv, mode="w", newline="") as csv_file:
        csv_writer = csv.writer(csv_file)

        # Write CSV header
        csv_writer.writerow(
            [
                "timestamp",
                "model_name",
                "layer",
                "width",
                "type",
                "dataset_name",
                "dataset_split",
                "linear_macro_f1_score",
                "linear_accuracy",
                "linear_mean_cv_accuracy",
                "linear_std_cv_accuracy",
                "decision_tree_macro_f1_score",
                "decision_tree_accuracy",
                "decision_tree_mean_cv_accuracy",
                "decision_tree_std_cv_accuracy",
            ]
        )

        # Traverse the dashboard directory
        for root, _, files in os.walk(dashboard_dir):
            for file in files:
                if file.endswith(".json"):
                    json_path = os.path.join(root, file)

                    try:
                        # Load the JSON file
                        with open(json_path, "r") as f:
                            data = json.load(f)

                        # Extract metadata
                        metadata = data.get("metadata", {})
                        models = data.get("models", {})

                        timestamp = metadata.get("timestamp", "N/A")
                        model_name = metadata.get("model", {}).get("name", "N/A")
                        layer = metadata.get("model", {}).get("layer", "N/A")
                        width = metadata.get("args", {}).get("width", "N/A")
                        dataset_name = metadata.get("dataset", {}).get("name", "N/A")
                        dataset_split = metadata.get("args", {}).get(
                            "dataset_split", "N/A"
                        )
                        hidden = metadata.get("dataset", {}).get("hidden", "N/A")

                        if hidden:
                            hidden_value = "Hidden States"
                        else:
                            hidden_value = "SAE Features"

                        # Extract Linear Probe metrics
                        linear_probe = models.get("linearProbe", {})
                        linear_macro_avg = linear_probe.get(
                            "aggregated_metrics", {}
                        ).get("macro avg", {})
                        linear_f1_score = linear_macro_avg.get("f1_score", "N/A")

                        linear_performance = linear_probe.get("performance", {})
                        linear_accuracy = linear_performance.get("accuracy", "N/A")
                        linear_cv = linear_performance.get("cross_validation", {})
                        linear_mean_cv_accuracy = linear_cv.get("mean_accuracy", "N/A")
                        linear_std_cv_accuracy = linear_cv.get("std_accuracy", "N/A")

                        # Extract Decision Tree metrics
                        decision_tree = models.get("decisionTree", {})
                        decision_tree_macro_avg = decision_tree.get(
                            "aggregated_metrics", {}
                        ).get("macro avg", {})
                        decision_tree_f1_score = decision_tree_macro_avg.get(
                            "f1_score", "N/A"
                        )

                        decision_tree_performance = decision_tree.get("performance", {})
                        decision_tree_accuracy = decision_tree_performance.get(
                            "accuracy", "N/A"
                        )
                        decision_tree_cv = decision_tree_performance.get(
                            "cross_validation", {}
                        )
                        decision_tree_mean_cv_accuracy = decision_tree_cv.get(
                            "mean_accuracy", "N/A"
                        )
                        decision_tree_std_cv_accuracy = decision_tree_cv.get(
                            "std_accuracy", "N/A"
                        )

                        # Append extracted data to CSV
                        csv_writer.writerow(
                            [
                                timestamp,
                                model_name,
                                layer,
                                width,
                                hidden_value,
                                dataset_name,
                                dataset_split,
                                linear_f1_score,
                                linear_accuracy,
                                linear_mean_cv_accuracy,
                                linear_std_cv_accuracy,
                                decision_tree_f1_score,
                                decision_tree_accuracy,
                                decision_tree_mean_cv_accuracy,
                                decision_tree_std_cv_accuracy,
                            ]
                        )

                        print(f"Processed: {json_path}")

                    except Exception as e:
                        print(f"Error processing {json_path}: {e}")

    print(f"Extraction complete. Results saved to {output_csv}")


# Example usage
if __name__ == "__main__":
    dashboard_dir = (
        "./output/probe_performance/"  # Replace with your dashboard directory
    )
    output_csv = "./macro_f1_scores.csv"  # Replace with your desired CSV output file
    extract_f1_scores(dashboard_dir, output_csv)

In [None]:
import pandas as pd

pd.read_csv(output_csv)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

raw_df = pd.read_csv(output_csv)

# Group by relevant fields to compute mean scores
grouped_df = (
    raw_df.groupby(
        ["model_name", "layer", "width", "type", "dataset_name", "dataset_split"]
    )
    .agg(
        {
            "linear_macro_f1_score": "mean",
            "linear_accuracy": "mean",
            "linear_mean_cv_accuracy": "mean",
            "linear_std_cv_accuracy": "mean",
            "decision_tree_macro_f1_score": "mean",
            "decision_tree_accuracy": "mean",
            "decision_tree_mean_cv_accuracy": "mean",
            "decision_tree_std_cv_accuracy": "mean",
        }
    )
    .reset_index()
)

# Pivot the data to separate SAE Features and Hidden States
merged_df = grouped_df.pivot(
    index=["model_name", "layer", "width", "dataset_name", "dataset_split"],
    columns="type",
    values=[
        "linear_macro_f1_score",
        "linear_accuracy",
        "linear_mean_cv_accuracy",
        "linear_std_cv_accuracy",
        "decision_tree_macro_f1_score",
        "decision_tree_accuracy",
        "decision_tree_mean_cv_accuracy",
        "decision_tree_std_cv_accuracy",
    ],
)

# Flatten the MultiIndex columns
merged_df.columns = ["_".join(col).strip() for col in merged_df.columns.values]
merged_df = merged_df.reset_index()

# Preview the merged DataFrame
print(merged_df.head())

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns

# Set style
plt.style.use("seaborn-v0_8-paper")

# Use colorblind-friendly palette
# Choose from color-blind friendly palettes
colors = sns.color_palette("colorblind")

# Create settings combinations for x-axis
settings = merged_df.apply(
    lambda row: f"{row['model_name']}\n{row['width']}\n{row['layer']}\n{row['dataset_name']}",
    axis=1,
)

# Set up the figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8), sharey=True)

# Set position of bars
x = np.arange(len(settings))
width = 0.35  # Width of the bars

# Nice Colors for the bars
colors_hidden = colors[0]
colors_sae = colors[2]

# Linear Probe Results
probe_hidden = merged_df["linear_macro_f1_score_Hidden States"].fillna(0).values
probe_sae = merged_df["linear_macro_f1_score_SAE Features"].fillna(0).values

rects1 = ax1.bar(
    x - width / 2,
    probe_hidden,
    width,
    label="Hidden States",
    color=colors_hidden,
    alpha=0.9,
)
rects2 = ax1.bar(
    x + width / 2, probe_sae, width, label="SAE Features", color=colors_sae, alpha=0.9
)

ax1.set_ylabel("Macro F1 Score", fontsize=14)
ax1.set_title("Linear Probe Performance", fontsize=16, weight="bold")
ax1.set_xticks(x)
ax1.set_xticklabels(settings, rotation=45, ha="right", fontsize=10)
ax1.legend(fontsize=12, loc="upper left")
ax1.grid(axis="y", linestyle="--", alpha=0.7)

# Decision Tree Results
tree_hidden = merged_df["decision_tree_macro_f1_score_Hidden States"].fillna(0).values
tree_sae = merged_df["decision_tree_macro_f1_score_SAE Features"].fillna(0).values

rects3 = ax2.bar(
    x - width / 2,
    tree_hidden,
    width,
    label="Hidden States",
    color=colors_hidden,
    alpha=0.9,
)
rects4 = ax2.bar(
    x + width / 2, tree_sae, width, label="SAE Features", color=colors_sae, alpha=0.9
)

ax2.set_ylabel("Macro F1 Score", fontsize=14)
ax2.set_title("Decision Tree Performance", fontsize=16, weight="bold")
ax2.set_xticks(x)
ax2.set_xticklabels(settings, rotation=45, ha="right", fontsize=10)
ax2.legend(fontsize=12, loc="upper left")
ax2.grid(axis="y", linestyle="--", alpha=0.7)


# Add value labels to bars
def autolabel(ax, rects, color):
    """Attach a text label above each bar displaying its height."""
    for rect in rects:
        height = rect.get_height()
        if height > 0:  # Only annotate non-zero values
            ax.annotate(
                f"{height:.2f}",
                xy=(rect.get_x() + rect.get_width() / 2, height),
                xytext=(0, 5),  # Vertical offset
                textcoords="offset points",
                ha="center",
                va="bottom",
                fontsize=10,
                color=color,
                weight="bold",
            )


autolabel(ax1, rects1, colors_hidden)
autolabel(ax1, rects2, colors_sae)
autolabel(ax2, rects3, colors_hidden)
autolabel(ax2, rects4, colors_sae)

# Tighten layout and add overall title
fig.suptitle(
    "Macro F1 Score Comparison: Linear Probe vs. Decision Tree",
    fontsize=18,
    weight="bold",
)
fig.tight_layout(rect=[0, 0.03, 1, 0.95])

# Save the figure
plt.savefig("f1_comparison.png", dpi=300, bbox_inches="tight")

# Show the plot
plt.show()