In [None]:
import os
import glob
import pandas as pd
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

results_dir = "/data1/shengen/STATS403/project/results"
csv_files = glob.glob(os.path.join(results_dir, "*.csv"))

datasets = ["books", "disney", "gen_500", "gen_1000", "gen_time"]

summary_list = []

for file in csv_files:
    df = pd.read_csv(file)
    if df.empty:
        print(f"Skipped empty file: {file}")
        continue

    basename = os.path.basename(file).replace(".csv", "")
    matched_dataset = next((ds for ds in datasets if ds in basename), "unknown")
    model = basename.replace(f"_{matched_dataset}", "") if matched_dataset != "unknown" else basename

    means = df.mean()
    stds = df.std()

    summary = {
        "model": model,
        "dataset": matched_dataset,
    }
    for col in df.columns:
        summary[col] = f"{means[col]:.4f} ± {stds[col]:.4f}"

    summary_list.append(summary)

summary_df = pd.DataFrame(summary_list)
model_order = summary_df['model'].drop_duplicates().tolist()
summary_df['model'] = pd.Categorical(summary_df['model'], categories=model_order, ordered=True)
summary_df = summary_df.sort_values(by="model")

summary_path = os.path.join(results_dir, "summary_of_results.csv")
summary_df.to_csv(summary_path, index=False)
print(f"Summary saved to: {summary_path}")