In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
mpl.style.use('ggplot')
from matplotlib.backends.backend_pdf import PdfPages

In [9]:
# === Robust table builder for MLP/CNN (RNN除外) ===
# res/from_splits/seed_XXXX/ 内の per-seed CSV から、T=1000 の最終値だけを拾って
# シナリオ×モデルの FDR / Power (=1 - Type II) テーブルを作成する。
# ファイル名の揺れに対応:  mlp_fdr_gaussian.csv  と  mlp_gaussian_fdr.csv の両方を試す。

from pathlib import Path
import pandas as pd
import numpy as np

IN_DIR = Path("res/from_splits")
SCENARIOS = ["gaussian", "t3", "spiked", "var"]
MODELS = ["mlp", "cnn"]  # RNNは集計対象から除外
pd.set_option("display.width", None)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_colwidth", None)

# 試すファイル名パターン（順にトライ）
#  - {model}_{metric}_{scenario}.csv  （例: mlp_fdr_gaussian.csv）
#  - {model}_{scenario}_{metric}.csv  （例: mlp_gaussian_fdr.csv）★あなたの手元はコレ
def candidate_filenames(model: str, metric: str, scenario: str):
    return [
        f"{model}_{metric}_{scenario}.csv",
        f"{model}_{scenario}_{metric}.csv",
    ]

def read_last_value_from_csv(path: Path):
    """CSVから最後の非NaN値（=T=1000想定）を1つ返す。なければNone。"""
    try:
        if not path.exists():
            return None
        df = pd.read_csv(path, header=None)
        if df.empty:
            return None
        # flatten → 数値化 → 非NaN の最後
        v = pd.to_numeric(df.stack(), errors="coerce").dropna()
        return float(v.iloc[-1]) if not v.empty else None
    except Exception:
        return None

def collect_per_seed_metric(model: str, metric: str, scenario: str):
    """seed_XXXX の各フォルダから（命名揺れを吸収しながら）最終値を回収して配列で返す。"""
    vals = []
    seeds = sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])
    for sd in seeds:
        found = None
        for fname in candidate_filenames(model, metric, scenario):
            v = read_last_value_from_csv(sd / fname)
            if v is not None:
                found = v
                break
        if found is not None:
            vals.append(found)
    return np.array(vals, dtype=float) if len(vals) > 0 else None

def mean_std_str(arr, fmt="{:.3f} ({:.3f})", blank="—"):
    if arr is None or len(arr) == 0:
        return blank
    m = np.nanmean(arr); s = np.nanstd(arr)
    return blank if np.isnan(m) else fmt.format(m, s)

# 診断: どのファイルが拾えているか（最初の数シードだけ表示）
print("== quick diagnostic (first 3 seeds) ==")
for sc in SCENARIOS:
    for model in MODELS:
        paths = []
        for sd in sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])[:3]:
            cand = [sd / fn for fn in candidate_filenames(model, "fdr", sc)]
            ok = [p.name for p in cand if p.exists()]
            paths.append(ok[0] if ok else "(missing)")
        print(f"scenario={sc:7s}, model={model:3s}, examples={paths}")

# 集計: FDR と Power（= 1 - typeII）を別テーブルで
fdr_rows, pwr_rows = [], []
for sc in SCENARIOS:
    fdr_row = {"Scenario": sc}
    pwr_row = {"Scenario": sc}
    for model in MODELS:
        fdr_vals = collect_per_seed_metric(model, "fdr", sc)
        t2_vals  = collect_per_seed_metric(model, "typeII", sc)
        pwr_vals = (1.0 - t2_vals) if t2_vals is not None else None
        fdr_row[model.upper()] = mean_std_str(fdr_vals)
        pwr_row[model.upper()] = mean_std_str(pwr_vals)
    fdr_rows.append(fdr_row)
    pwr_rows.append(pwr_row)

FDR_table   = pd.DataFrame(fdr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]
Power_table = pd.DataFrame(pwr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]

from IPython.display import display
print("\nFDR (mean (std))")
display(FDR_table)
print("\nPower (mean (std))")
display(Power_table)

# 必要なら保存
# FDR_table.to_csv("summary_FDR_MLP_CNN.csv")
# Power_table.to_csv("summary_Power_MLP_CNN.csv")


== quick diagnostic (first 3 seeds) ==
scenario=gaussian, model=mlp, examples=['mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv']
scenario=gaussian, model=cnn, examples=['cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv']
scenario=t3     , model=mlp, examples=['mlp_t3_fdr.csv', 'mlp_t3_fdr.csv', 'mlp_t3_fdr.csv']
scenario=t3     , model=cnn, examples=['cnn_t3_fdr.csv', 'cnn_t3_fdr.csv', 'cnn_t3_fdr.csv']
scenario=spiked , model=mlp, examples=['mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv']
scenario=spiked , model=cnn, examples=['cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv']
scenario=var    , model=mlp, examples=['(missing)', '(missing)', '(missing)']
scenario=var    , model=cnn, examples=['(missing)', '(missing)', '(missing)']



FDR (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.075 (0.019),0.094 (0.028)
t3,0.043 (0.042),0.047 (0.044)
spiked,0.076 (0.034),0.099 (0.028)
var,—,—



Power (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.830 (0.049),0.998 (0.002)
t3,0.363 (0.305),0.512 (0.382)
spiked,0.808 (0.070),0.998 (0.003)
var,—,—


In [10]:
# === Robust table builder for MLP/CNN (RNN除外) ===
# res/from_splits/seed_XXXX/ 内の per-seed CSV から、T=1000 の最終値だけを拾って
# シナリオ×モデルの FDR / Power (=1 - Type II) テーブルを作成する。
# ファイル名の揺れに対応:  mlp_fdr_gaussian.csv  と  mlp_gaussian_fdr.csv の両方を試す。

from pathlib import Path
import pandas as pd
import numpy as np

IN_DIR = Path("res/from_splits_m=500")
SCENARIOS = ["gaussian", "t3", "spiked"]
MODELS = ["mlp", "cnn"]  # RNNは集計対象から除外
pd.set_option("display.width", None)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_colwidth", None)

# 試すファイル名パターン（順にトライ）
#  - {model}_{metric}_{scenario}.csv  （例: mlp_fdr_gaussian.csv）
#  - {model}_{scenario}_{metric}.csv  （例: mlp_gaussian_fdr.csv）★あなたの手元はコレ
def candidate_filenames(model: str, metric: str, scenario: str):
    return [
        f"{model}_{metric}_{scenario}.csv",
        f"{model}_{scenario}_{metric}.csv",
    ]

def read_last_value_from_csv(path: Path):
    """CSVから最後の非NaN値（=T=1000想定）を1つ返す。なければNone。"""
    try:
        if not path.exists():
            return None
        df = pd.read_csv(path, header=None)
        if df.empty:
            return None
        # flatten → 数値化 → 非NaN の最後
        v = pd.to_numeric(df.stack(), errors="coerce").dropna()
        return float(v.iloc[-1]) if not v.empty else None
    except Exception:
        return None

def collect_per_seed_metric(model: str, metric: str, scenario: str):
    """seed_XXXX の各フォルダから（命名揺れを吸収しながら）最終値を回収して配列で返す。"""
    vals = []
    seeds = sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])
    for sd in seeds:
        found = None
        for fname in candidate_filenames(model, metric, scenario):
            v = read_last_value_from_csv(sd / fname)
            if v is not None:
                found = v
                break
        if found is not None:
            vals.append(found)
    return np.array(vals, dtype=float) if len(vals) > 0 else None

def mean_std_str(arr, fmt="{:.3f} ({:.3f})", blank="—"):
    if arr is None or len(arr) == 0:
        return blank
    m = np.nanmean(arr); s = np.nanstd(arr)
    return blank if np.isnan(m) else fmt.format(m, s)

# 診断: どのファイルが拾えているか（最初の数シードだけ表示）
print("== quick diagnostic (first 3 seeds) ==")
for sc in SCENARIOS:
    for model in MODELS:
        paths = []
        for sd in sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])[:3]:
            cand = [sd / fn for fn in candidate_filenames(model, "fdr", sc)]
            ok = [p.name for p in cand if p.exists()]
            paths.append(ok[0] if ok else "(missing)")
        print(f"scenario={sc:7s}, model={model:3s}, examples={paths}")

# 集計: FDR と Power（= 1 - typeII）を別テーブルで
fdr_rows, pwr_rows = [], []
for sc in SCENARIOS:
    fdr_row = {"Scenario": sc}
    pwr_row = {"Scenario": sc}
    for model in MODELS:
        fdr_vals = collect_per_seed_metric(model, "fdr", sc)
        t2_vals  = collect_per_seed_metric(model, "typeII", sc)
        pwr_vals = (1.0 - t2_vals) if t2_vals is not None else None
        fdr_row[model.upper()] = mean_std_str(fdr_vals)
        pwr_row[model.upper()] = mean_std_str(pwr_vals)
    fdr_rows.append(fdr_row)
    pwr_rows.append(pwr_row)

FDR_table   = pd.DataFrame(fdr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]
Power_table = pd.DataFrame(pwr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]

from IPython.display import display
print("\nFDR (mean (std))")
display(FDR_table)
print("\nPower (mean (std))")
display(Power_table)

# 必要なら保存
# FDR_table.to_csv("summary_FDR_MLP_CNN.csv")
# Power_table.to_csv("summary_Power_MLP_CNN.csv")

== quick diagnostic (first 3 seeds) ==
scenario=gaussian, model=mlp, examples=['mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv']
scenario=gaussian, model=cnn, examples=['cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv']
scenario=t3     , model=mlp, examples=['mlp_t3_fdr.csv', 'mlp_t3_fdr.csv', 'mlp_t3_fdr.csv']
scenario=t3     , model=cnn, examples=['cnn_t3_fdr.csv', 'cnn_t3_fdr.csv', 'cnn_t3_fdr.csv']
scenario=spiked , model=mlp, examples=['mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv']
scenario=spiked , model=cnn, examples=['cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv']

FDR (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.087 (0.092),0.087 (0.079)
t3,0.074 (0.133),0.041 (0.098)
spiked,0.116 (0.223),0.138 (0.214)



Power (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.031 (0.029),0.057 (0.033)
t3,0.012 (0.015),0.009 (0.010)
spiked,0.028 (0.033),0.059 (0.046)


In [11]:
# === Robust table builder for MLP/CNN (RNN除外) ===
# res/from_splits/seed_XXXX/ 内の per-seed CSV から、T=1000 の最終値だけを拾って
# シナリオ×モデルの FDR / Power (=1 - Type II) テーブルを作成する。
# ファイル名の揺れに対応:  mlp_fdr_gaussian.csv  と  mlp_gaussian_fdr.csv の両方を試す。

from pathlib import Path
import pandas as pd
import numpy as np

IN_DIR = Path("res/from_splits_m=1000")
SCENARIOS = ["gaussian", "t3", "spiked"]
MODELS = ["mlp", "cnn"]  # RNNは集計対象から除外
pd.set_option("display.width", None)
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_colwidth", None)

# 試すファイル名パターン（順にトライ）
#  - {model}_{metric}_{scenario}.csv  （例: mlp_fdr_gaussian.csv）
#  - {model}_{scenario}_{metric}.csv  （例: mlp_gaussian_fdr.csv）★あなたの手元はコレ
def candidate_filenames(model: str, metric: str, scenario: str):
    return [
        f"{model}_{metric}_{scenario}.csv",
        f"{model}_{scenario}_{metric}.csv",
    ]

def read_last_value_from_csv(path: Path):
    """CSVから最後の非NaN値（=T=1000想定）を1つ返す。なければNone。"""
    try:
        if not path.exists():
            return None
        df = pd.read_csv(path, header=None)
        if df.empty:
            return None
        # flatten → 数値化 → 非NaN の最後
        v = pd.to_numeric(df.stack(), errors="coerce").dropna()
        return float(v.iloc[-1]) if not v.empty else None
    except Exception:
        return None

def collect_per_seed_metric(model: str, metric: str, scenario: str):
    """seed_XXXX の各フォルダから（命名揺れを吸収しながら）最終値を回収して配列で返す。"""
    vals = []
    seeds = sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])
    for sd in seeds:
        found = None
        for fname in candidate_filenames(model, metric, scenario):
            v = read_last_value_from_csv(sd / fname)
            if v is not None:
                found = v
                break
        if found is not None:
            vals.append(found)
    return np.array(vals, dtype=float) if len(vals) > 0 else None

def mean_std_str(arr, fmt="{:.3f} ({:.3f})", blank="—"):
    if arr is None or len(arr) == 0:
        return blank
    m = np.nanmean(arr); s = np.nanstd(arr)
    return blank if np.isnan(m) else fmt.format(m, s)

# 診断: どのファイルが拾えているか（最初の数シードだけ表示）
print("== quick diagnostic (first 3 seeds) ==")
for sc in SCENARIOS:
    for model in MODELS:
        paths = []
        for sd in sorted([p for p in IN_DIR.glob("seed_*") if p.is_dir()])[:3]:
            cand = [sd / fn for fn in candidate_filenames(model, "fdr", sc)]
            ok = [p.name for p in cand if p.exists()]
            paths.append(ok[0] if ok else "(missing)")
        print(f"scenario={sc:7s}, model={model:3s}, examples={paths}")

# 集計: FDR と Power（= 1 - typeII）を別テーブルで
fdr_rows, pwr_rows = [], []
for sc in SCENARIOS:
    fdr_row = {"Scenario": sc}
    pwr_row = {"Scenario": sc}
    for model in MODELS:
        fdr_vals = collect_per_seed_metric(model, "fdr", sc)
        t2_vals  = collect_per_seed_metric(model, "typeII", sc)
        pwr_vals = (1.0 - t2_vals) if t2_vals is not None else None
        fdr_row[model.upper()] = mean_std_str(fdr_vals)
        pwr_row[model.upper()] = mean_std_str(pwr_vals)
    fdr_rows.append(fdr_row)
    pwr_rows.append(pwr_row)

FDR_table   = pd.DataFrame(fdr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]
Power_table = pd.DataFrame(pwr_rows).set_index("Scenario")[ [m.upper() for m in MODELS] ]

from IPython.display import display
print("\nFDR (mean (std))")
display(FDR_table)
print("\nPower (mean (std))")
display(Power_table)

# 必要なら保存
# FDR_table.to_csv("summary_FDR_MLP_CNN.csv")
# Power_table.to_csv("summary_Power_MLP_CNN.csv")

== quick diagnostic (first 3 seeds) ==
scenario=gaussian, model=mlp, examples=['mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv', 'mlp_gaussian_fdr.csv']
scenario=gaussian, model=cnn, examples=['cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv', 'cnn_gaussian_fdr.csv']
scenario=t3     , model=mlp, examples=['mlp_t3_fdr.csv', 'mlp_t3_fdr.csv', 'mlp_t3_fdr.csv']
scenario=t3     , model=cnn, examples=['cnn_t3_fdr.csv', 'cnn_t3_fdr.csv', 'cnn_t3_fdr.csv']
scenario=spiked , model=mlp, examples=['mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv', 'mlp_spiked_fdr.csv']
scenario=spiked , model=cnn, examples=['cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv', 'cnn_spiked_fdr.csv']

FDR (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.060 (0.030),0.075 (0.031)
t3,0.053 (0.057),0.040 (0.039)
spiked,0.047 (0.036),0.070 (0.023)



Power (mean (std))


Unnamed: 0_level_0,MLP,CNN
Scenario,Unnamed: 1_level_1,Unnamed: 2_level_1
gaussian,0.186 (0.069),0.546 (0.093)
t3,0.080 (0.068),0.122 (0.111)
spiked,0.187 (0.066),0.571 (0.094)


In [3]:
import pandas as pd
pd.read_csv("../DeepLINK/deeplink_runs_m=2000/deeplink_summary.csv")

Unnamed: 0,scenario,FDR_mean,FDR_std,Power_mean,Power_std,R_mean,R_std
0,gaussian,0.089605,0.021676,0.9998,0.000894,274.7,6.58627
1,spiked,0.087772,0.021688,1.0,0.0,274.2,6.453069
2,t3,0.081223,0.030194,0.8706,0.218809,238.25,60.688918


In [2]:
import pandas as pd
pd.read_csv("res_sngm_m=2000/summary.csv")

Unnamed: 0,scenario,n_seeds,FDR_mean,FDR_std,Power_mean,Power_std,R_mean,R_std
0,gaussian,20,0.086004,0.015435,0.8514,0.013403,232.9,2.826659
1,spiked,20,0.080625,0.016423,0.847,0.017012,230.35,4.304358
2,t3,20,0.06359,0.031835,0.5994,0.151907,160.9,41.794617


In [4]:
import pandas as pd
pd.read_csv("res_sngm_m=1000/summary.csv")

Unnamed: 0,scenario,n_seeds,FDR_mean,FDR_std,Power_mean,Power_std,R_mean,R_std
0,gaussian,20,0.131484,0.031154,0.658,0.041617,189.75,14.982907
1,spiked,20,0.151463,0.03165,0.6748,0.046288,199.35,18.631358
2,t3,20,0.144602,0.061272,0.4018,0.127972,117.05,37.95718


In [5]:
import pandas as pd
pd.read_csv("res_sngm_m=500/summary.csv")

Unnamed: 0,scenario,n_seeds,FDR_mean,FDR_std,Power_mean,Power_std,R_mean,R_std
0,gaussian,20,0.242608,0.049786,0.3806,0.093657,127.45,35.148933
1,spiked,20,0.264917,0.03989,0.4358,0.056221,149.0,22.825424
2,t3,20,0.325895,0.045576,0.4282,0.076988,158.9,28.715675
