In [None]:
import json
import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt

## Results Visualization

This notebook provides tools to visualize the results of your experiments. It parses results from json files to CSV format and plots DataFit comparisons for different models.

In [None]:
FOLDER = "./results_trivia_qa"

In [None]:
def extract_model_name(filename):
    """Extract model name from results_*.json filename."""
    # Remove 'results_' prefix and '.json' suffix
    basename = os.path.basename(filename)
    model_name = basename.replace('results_', '').replace('.json', '')
    return model_name

In [None]:
def load_results_files(directory_path):
    """Load all results_*.json files from the specified directory."""
    # Find all results_*.json files
    pattern = os.path.join(directory_path, "results_*.json")
    json_files = glob.glob(pattern)

    paraphrase_data = []
    qa_data = []

    for json_file in json_files:
        model_name = extract_model_name(json_file)

        try:
            with open(json_file, 'r') as f:
                data = json.load(f)

            # Extract paraphrase data
            if 'paraphrase' in data:
                para_row = data['paraphrase'].copy()
                para_row['model_name'] = model_name
                # Move model_name to the front
                para_row = {'model_name': para_row.pop('model_name'), **para_row}
                paraphrase_data.append(para_row)

            # Extract QA data
            if 'qa' in data:
                qa_row = data['qa'].copy()
                qa_row['model_name'] = model_name
                # Move model_name to the front
                qa_row = {'model_name': qa_row.pop('model_name'), **qa_row}
                qa_data.append(qa_row)

        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"Error processing {json_file}: {e}")
            continue

    return paraphrase_data, qa_data


In [None]:
def process():
    # Get the directory containing this script
    script_dir = FOLDER

    print(f"Processing JSON files in: {script_dir}")

    # Load data from all results_*.json files
    paraphrase_data, qa_data = load_results_files(script_dir)

    if not paraphrase_data and not qa_data:
        print("No results_*.json files found or no data extracted.")
        return

    # Create DataFrames
    if paraphrase_data:
        paraphrase_df = pd.DataFrame(paraphrase_data)
        # Sort by model_name for consistent ordering
        paraphrase_df = paraphrase_df.sort_values('model_name').reset_index(drop=True)

        # Save to CSV
        paraphrase_csv = os.path.join(script_dir, "paraphrase_results.csv")
        paraphrase_df.to_csv(paraphrase_csv, index=False)
        print(f"Paraphrase data saved to: {paraphrase_csv}")
        print(f"Paraphrase DataFrame shape: {paraphrase_df.shape}")
        print(f"Models in paraphrase data: {list(paraphrase_df['model_name'])}")
    else:
        print("No paraphrase data found.")

    if qa_data:
        qa_df = pd.DataFrame(qa_data)
        # Sort by model_name for consistent ordering
        qa_df = qa_df.sort_values('model_name').reset_index(drop=True)

        # Save to CSV
        qa_csv = os.path.join(script_dir, "qa_results.csv")
        qa_df.to_csv(qa_csv, index=False)
        print(f"QA data saved to: {qa_csv}")
        print(f"QA DataFrame shape: {qa_df.shape}")
        print(f"Models in QA data: {list(qa_df['model_name'])}")
    else:
        print("No QA data found.")

    # Print summary
    print(f"\nSummary:")
    print(f"- Processed {len(set([item['model_name'] for item in paraphrase_data + qa_data]))} unique models")
    print(f"- Paraphrase records: {len(paraphrase_data)}")
    print(f"- QA records: {len(qa_data)}")

    return paraphrase_df, qa_df


In [None]:
paraphrase_df, qa_df = process()

In [None]:
paraphrase_df.head()

In [None]:
paraphrase_df.columns

In [None]:
# convert Nmax_eta_ fields to int
for col in paraphrase_df.columns:
    if col.startswith("Nmax_eta_"):
        paraphrase_df[col] = paraphrase_df[col].astype(int)

for col in qa_df.columns:
    if col.startswith("Nmax_eta_"):
        qa_df[col] = qa_df[col].astype(int)

In [None]:
# print model_name and fit
p = paraphrase_df[["model_name", "dim", "DataFit", "BTI", "delta_op", "eta_JL", "CCS", "cos_mean", "Nmax_eta_0.2", "Nmax_eta_0.15"]].sort_values("DataFit", ascending=False)
p.style.format({"DataFit": "{:.3f}", "BTI": "{:.3f}", "delta_op": "{:.3f}", "eta_JL": "{:.3f}", "CCS": "{:.3f}", "cos_mean": "{:.3f}"})

In [None]:
# print model_name and fit
q = qa_df[["model_name", "dim", "DataFit", "BTI", "delta_op", "eta_JL", "CCS", "cos_mean", "Nmax_eta_0.2", "Nmax_eta_0.15"]].sort_values("DataFit", ascending=False)
q.style.format({"DataFit": "{:.3f}", "BTI": "{:.3f}", "delta_op": "{:.3f}", "eta_JL": "{:.3f}", "CCS": "{:.3f}", "cos_mean": "{:.3f}"})

In [None]:
# plot paraphrase fit vs qa fit as BAR chart

plt.figure(figsize=(8, 6))
merged_df = pd.merge(paraphrase_df, qa_df, on="model_name", suffixes=("_para", "_qa"))
merged_df = merged_df.sort_values("DataFit_para", ascending=False)
bar_width = 0.35
indices = np.arange(len(merged_df))
plt.bar(indices, merged_df["DataFit_para"], width=bar_width, label="Paraphrase Fit", color='b', alpha=0.7)
plt.bar(indices + bar_width, merged_df["DataFit_qa"], width=bar_width, label="QA Fit", color='r', alpha=0.7)
plt.xlabel("Models")
plt.ylabel("Data Fit")
plt.title("Paraphrase Fit vs QA Fit by Model")
plt.xticks(indices + bar_width / 2, merged_df["model_name"], rotation=90)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# plot BTI, cos_mean, eta_JL, CCS for each model as grouped bar chart
def plot_grouped_bar_chart(df):
    plt.figure(figsize=(12, 8))
    metrics = ["BTI", "cos_mean", "eta_JL", "CCS"]
    bar_width = 0.2
    indices = np.arange(len(df))
    for i, metric in enumerate(metrics):
        plt.bar(indices + i * bar_width, df[metric], width=bar_width, label=metric, alpha=0.7)
    plt.xlabel("Models")
    plt.ylabel("Metric Value")
    plt.title("Comparison of Metrics by Model")
    plt.xticks(indices + bar_width * (len(metrics) - 1) /
                2, df["model_name"], rotation=90)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
plot_grouped_bar_chart(paraphrase_df)

In [None]:
plot_grouped_bar_chart(qa_df)