In [5]:
seed = "0"
MAX_INDEX = 10000

In [6]:
# %%
import pandas as pd
import os

# %% [markdown]
# # 実験結果の要約（投稿割合＋投稿数）
# 各 `post_result_*.csv` を読み込み、割合と投稿数を平滑化して軽量CSVにまとめます。

# %%
def smooth_list(values, window_size=100):
    """リストをwindow_sizeごとに平均化"""
    smoothed = []
    for i in range(0, len(values), window_size):
        window = values[i:i+window_size]
        if window:
            smoothed.append(sum(window) / len(window))
    return smoothed

# %%
def process_single_experiment(data_dir, bins, output_csv, max_index=MAX_INDEX, window_size=100):
    """1つの実験ディレクトリから post_result_*.csv をまとめる"""
    ratios = {bin_name: [] for bin_name in bins}
    counts = []
    steps = []

    for i in range(max_index + 1):
        filepath = os.path.join(data_dir, f"post_result_{i}.csv")
        if not os.path.isfile(filepath):
            continue

        try:
            df = pd.read_csv(filepath)
            row = df.iloc[0]
            total = sum([int(row[bin_name]) for bin_name in bins])
            if total == 0:
                continue

            # 各binの割合
            for bin_name in bins:
                ratios[bin_name].append(int(row[bin_name]) / total)

            # 投稿数を別で保存
            counts.append(int(row.get("sumOfPosts", total)))  # sumOfPostsがあればそれを使う
            steps.append(i)

        except Exception as e:
            print(f"[{i}] error: {e}")

    if not steps:
        print("No data found!")
        return

    # 平滑化
    smoothed_steps = steps[::window_size]
    smoothed_ratios = {bin_name: smooth_list(values, window_size) for bin_name, values in ratios.items()}
    smoothed_counts = smooth_list(counts, window_size)

    # CSVに保存
    df_out = pd.DataFrame({"step": smoothed_steps, "sumOfPosts": smoothed_counts})
    for bin_name in bins:
        df_out[bin_name] = smoothed_ratios[bin_name]

    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    df_out.to_csv(output_csv, index=False)
    print(f"summary CSV saved: {output_csv}")

# %% [markdown]
# ## 実行例

# %%
data_dir = "./results/posts"  # 個別実験のディレクトリ
output_csv = "./results/summary/exp_summary_seed_" + seed + ".csv"
bins = ["bin_0", "bin_1", "bin_2", "bin_3", "bin_4"]
window_size = 100

process_single_experiment(data_dir, bins, output_csv, window_size=window_size)


summary CSV saved: ./results/summary/exp_summary_seed_0.csv


In [7]:
# %% [markdown]
# # 単一実験の variance を平滑化して CSV にまとめる
# - 対象: ./results/metrics/result_*.csv （1 実験ぶん）
# - 出力: ./results/summary/exp_summary_seed_X.csv
# - 処理: step で結合 → 時系列で並べ替え → 100step ごとに平均 → CSV 保存

# %%
import pandas as pd
import numpy as np
import os
import glob

# ==== パラメータ ====
DATA_DIR   = "./results/metrics"                  # この実験の metrics ディレクトリ
OUTPUT_CSV = "./results/summary/var_summary_seed_" + seed + ".csv"
STEP_BIN   = 100                                   # 何ステップごとに集約するか
MAX_STEP   = MAX_INDEX                                # 例: 5000 にすると 5000 までに制限。制限不要なら None

# %% 読み込み
def load_single_experiment_metrics(data_dir, max_step=None):
    files = sorted(glob.glob(os.path.join(data_dir, "result_*.csv")))
    if not files:
        raise FileNotFoundError(f"No result_*.csv in {data_dir}")

    dfs = []
    for f in files:
        try:
            df = pd.read_csv(f)
            if "step" not in df.columns:
                continue
            if max_step is not None:
                df = df[df["step"] <= max_step]
            dfs.append(df)
        except Exception as e:
            print(f"[skip] {f}: {e}")

    if not dfs:
        raise ValueError("No readable CSVs were loaded.")

    out = pd.concat(dfs, ignore_index=True)
    out = out.sort_values("step").reset_index(drop=True)
    return out

raw_df = load_single_experiment_metrics(DATA_DIR, MAX_STEP)
print(f"loaded rows: {len(raw_df)} | columns: {list(raw_df.columns)}")

# %% variance 列だけ取り出して 100 step ごとに平均
def summarize_variances(df, step_bin):
    # 明示的に列を指定
    var_cols = ["opinionVar", "postOpinionVar"]

    # 存在チェック
    for c in var_cols:
        if c not in df.columns:
            raise ValueError(f"Column {c} not found in input CSV")

    # step を bin にまとめる
    df["step_bin"] = (df["step"] // step_bin) * step_bin

    grouped = df.groupby("step_bin")[var_cols].mean().reset_index()
    grouped = grouped.rename(columns={"step_bin": "step"})
    return grouped


summary_df = summarize_variances(raw_df, STEP_BIN)

# %% 保存
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
summary_df.to_csv(OUTPUT_CSV, index=False)
print(f"saved: {OUTPUT_CSV}")
print(summary_df.head(5))


loaded rows: 10001 | columns: ['step', 'opinionVar', 'postOpinionVar', 'follow', 'unfollow', 'rewire', 'opinionAvg', 'feedPostOpinionMean_0', 'feedPostOpinionMean_1', 'feedPostOpinionMean_2', 'feedPostOpinionMean_3', 'feedPostOpinionMean_4', 'feedPostOpinionVar_0', 'feedPostOpinionVar_1', 'feedPostOpinionVar_2', 'feedPostOpinionVar_3', 'feedPostOpinionVar_4', 'cRateMean_0', 'cRateMean_1', 'cRateMean_2', 'cRateMean_3', 'cRateMean_4', 'cRateVar_0', 'cRateVar_1', 'cRateVar_2', 'cRateVar_3', 'cRateVar_4', 'highComfortRateNum_0', 'highComfortRateNum_1', 'highComfortRateNum_2', 'highComfortRateNum_3', 'highComfortRateNum_4']
saved: ./results/summary/var_summary_seed_0.csv
   step  opinionVar  postOpinionVar
0     0    0.259819        0.223584
1   100    0.213470        0.192459
2   200    0.207535        0.178463
3   300    0.205455        0.176574
4   400    0.208381        0.178077


In [8]:
# %% [markdown]
# # 単一実験の variance を平滑化して CSV にまとめる
# - 対象: ./results/metrics/result_*.csv （1 実験ぶん）
# - 出力: ./results/summary/exp_summary_seed_X.csv
# - 処理: step で結合 → 時系列で並べ替え → 100step ごとに平均 → CSV 保存

# %%
import pandas as pd
import numpy as np
import os
import glob

# ==== パラメータ ====
DATA_DIR   = "./results/metrics"                  # この実験の metrics ディレクトリ
OUTPUT_CSV = "./results/summary/highCRNum_summary_seed_" + seed + ".csv"
STEP_BIN   = 100                                   # 何ステップごとに集約するか
MAX_STEP   = MAX_INDEX                               # 例: 5000 にすると 5000 までに制限。制限不要なら None

# %% 読み込み
def load_single_experiment_metrics(data_dir, max_step=None):
    files = sorted(glob.glob(os.path.join(data_dir, "result_*.csv")))
    if not files:
        raise FileNotFoundError(f"No result_*.csv in {data_dir}")

    dfs = []
    for f in files:
        try:
            df = pd.read_csv(f)
            if "step" not in df.columns:
                continue
            if max_step is not None:
                df = df[df["step"] <= max_step]
            dfs.append(df)
        except Exception as e:
            print(f"[skip] {f}: {e}")

    if not dfs:
        raise ValueError("No readable CSVs were loaded.")

    out = pd.concat(dfs, ignore_index=True)
    out = out.sort_values("step").reset_index(drop=True)
    return out

raw_df = load_single_experiment_metrics(DATA_DIR, MAX_STEP)
print(f"loaded rows: {len(raw_df)} | columns: {list(raw_df.columns)}")

# %% variance 列だけ取り出して 100 step ごとに平均
def summarize_variances(df, step_bin):
    # 明示的に列を指定
    var_cols = ["highComfortRateNum_0", "highComfortRateNum_1", "highComfortRateNum_2", "highComfortRateNum_3", "highComfortRateNum_4"]

    # 存在チェック
    for c in var_cols:
        if c not in df.columns:
            raise ValueError(f"Column {c} not found in input CSV")

    # step を bin にまとめる
    df["step_bin"] = (df["step"] // step_bin) * step_bin

    grouped = df.groupby("step_bin")[var_cols].mean().reset_index()
    grouped = grouped.rename(columns={"step_bin": "step"})
    return grouped


summary_df = summarize_variances(raw_df, STEP_BIN)

# %% 保存
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
summary_df.to_csv(OUTPUT_CSV, index=False)
print(f"saved: {OUTPUT_CSV}")
print(summary_df.head(5))


loaded rows: 10001 | columns: ['step', 'opinionVar', 'postOpinionVar', 'follow', 'unfollow', 'rewire', 'opinionAvg', 'feedPostOpinionMean_0', 'feedPostOpinionMean_1', 'feedPostOpinionMean_2', 'feedPostOpinionMean_3', 'feedPostOpinionMean_4', 'feedPostOpinionVar_0', 'feedPostOpinionVar_1', 'feedPostOpinionVar_2', 'feedPostOpinionVar_3', 'feedPostOpinionVar_4', 'cRateMean_0', 'cRateMean_1', 'cRateMean_2', 'cRateMean_3', 'cRateMean_4', 'cRateVar_0', 'cRateVar_1', 'cRateVar_2', 'cRateVar_3', 'cRateVar_4', 'highComfortRateNum_0', 'highComfortRateNum_1', 'highComfortRateNum_2', 'highComfortRateNum_3', 'highComfortRateNum_4']
saved: ./results/summary/highCRNum_summary_seed_0.csv
   step  highComfortRateNum_0  highComfortRateNum_1  highComfortRateNum_2  \
0     0                  7.89                 14.18                 31.67   
1   100                 20.61                 24.86                 67.64   
2   200                 20.22                 28.04                 68.63   
3   300    