In [1]:
from pathlib import Path
import re
import pandas as pd

# 노트북이 /experiments 안에 있을 때 기준
THIS_NOTEBOOK = Path().resolve()
PROJECT_ROOT = THIS_NOTEBOOK.parent   # dsvm-dtw/
RESULTS_DIR = PROJECT_ROOT / "results" / "tables"

print("PROJECT_ROOT:", PROJECT_ROOT)
print("RESULTS_DIR :", RESULTS_DIR)

PROJECT_ROOT: C:\Users\siam2\Desktop\dsvm-dtw
RESULTS_DIR : C:\Users\siam2\Desktop\dsvm-dtw\results\tables


In [2]:
# 공통 파라미터 (나중에 다른 설정에도 재사용 가능하도록)
dataset = "ECG5000"
stream = "test_outcontrol"
mode = "dtw"
data_version = "original"
seed = 2025
m = 100

# 병합 대상 파일들: 직접 나열
file_names = [
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}(0~499)_bootstrap_seed{seed}_m={m}.csv",
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}(500~1999)_bootstrap_seed{seed}_m={m}.csv",
]

file_paths = [RESULTS_DIR / name for name in file_names]
file_paths


[WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_outcontrol_p_swk_matrix_dtw_original(0~499)_bootstrap_seed2025_m=100.csv'),
 WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_outcontrol_p_swk_matrix_dtw_original(500~1999)_bootstrap_seed2025_m=100.csv')]

In [3]:
dfs = []
for path in file_paths:
    print(f"읽는 중: {path.name}")
    df = pd.read_csv(path, index_col=0)  # 't' index 복구
    print("  shape:", df.shape)
    dfs.append(df)

# 가로 방향(열 기준) 병합
df_merged = pd.concat(dfs, axis=1)

print("병합 결과 shape:", df_merged.shape)
df_merged.head()


읽는 중: ECG5000_test_outcontrol_p_swk_matrix_dtw_original(0~499)_bootstrap_seed2025_m=100.csv
  shape: (100, 500)
읽는 중: ECG5000_test_outcontrol_p_swk_matrix_dtw_original(500~1999)_bootstrap_seed2025_m=100.csv
  shape: (100, 1500)
병합 결과 shape: (100, 2000)


Unnamed: 0_level_0,rep_0,rep_1,rep_2,rep_3,rep_4,rep_5,rep_6,rep_7,rep_8,rep_9,...,rep_1990,rep_1991,rep_1992,rep_1993,rep_1994,rep_1995,rep_1996,rep_1997,rep_1998,rep_1999
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.322255,0.33986,0.3316,0.320517,0.325025,0.330175,0.326417,0.328414,0.346132,0.311175,...,0.326186,0.331629,0.348634,0.327837,0.341893,0.303325,0.31024,0.342213,0.318712,0.337981
1,0.307569,0.336232,0.376016,0.3412,0.343527,0.337977,0.337915,0.319781,0.319418,0.328727,...,0.360634,0.347343,0.338916,0.344122,0.358577,0.345248,0.345086,0.343557,0.348165,0.342687
2,0.327368,0.371829,0.339363,0.36213,0.365707,0.37526,0.383356,0.331345,0.352723,0.379029,...,0.341534,0.387094,0.355994,0.358749,0.380129,0.371939,0.362343,0.349521,0.391304,0.359901
3,0.372213,0.38871,0.38879,0.376744,0.381497,0.37435,0.343659,0.39549,0.393246,0.403136,...,0.326407,0.374072,0.369581,0.371938,0.401974,0.362367,0.346419,0.366481,0.410909,0.3847
4,0.406512,0.423906,0.445861,0.369098,0.374698,0.39363,0.378518,0.414995,0.391736,0.439518,...,0.363733,0.414997,0.381583,0.373247,0.412303,0.381956,0.381024,0.420852,0.414725,0.394558


In [4]:
# 1) index 동일성 체크
all_index_equal = all(dfs[0].index.equals(df.index) for df in dfs[1:])
if not all_index_equal:
    raise ValueError("병합 대상 파일들의 index(t)가 서로 다릅니다.")

# 2) 중복 컬럼 검사
if not df_merged.columns.is_unique:
    dup_cols = df_merged.columns[df_merged.columns.duplicated()]
    raise ValueError(f"중복된 컬럼이 있습니다: {dup_cols.tolist()}")

print("index 및 컬럼 검사 통과 ✅")


index 및 컬럼 검사 통과 ✅


In [5]:
# 파일명에서 rep 범위 (start, end)를 정규표현식으로 추출
pattern = re.compile(r".*\((\d+)~(\d+)\)_bootstrap")

ranges = []
for path in file_paths:
    mobj = pattern.match(path.name)
    if not mobj:
        raise ValueError(f"파일 이름에서 범위를 파싱할 수 없습니다: {path.name}")
    start = int(mobj.group(1))
    end = int(mobj.group(2))
    ranges.append((start, end))

ranges

global_start = min(s for s, e in ranges)
global_end = max(e for s, e in ranges)

rep_str_merged = f"{global_start}~{global_end}"

save_name_merged = (
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}"
    f"({rep_str_merged})_bootstrap_seed{seed}_m={m}.csv"
)

save_path_merged = RESULTS_DIR / save_name_merged
save_path_merged


WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_outcontrol_p_swk_matrix_dtw_original(0~1999)_bootstrap_seed2025_m=100.csv')

In [6]:
df_merged.to_csv(save_path_merged)
print(f"✅ 병합 완료: {save_path_merged}")

✅ 병합 완료: C:\Users\siam2\Desktop\dsvm-dtw\results\tables\ECG5000_test_outcontrol_p_swk_matrix_dtw_original(0~1999)_bootstrap_seed2025_m=100.csv
