In [40]:
from pathlib import Path
import re
import pandas as pd

# 노트북이 /experiments 안에 있을 때 기준
THIS_NOTEBOOK = Path().resolve()
PROJECT_ROOT = THIS_NOTEBOOK.parent   # dsvm-dtw/
RESULTS_DIR = PROJECT_ROOT / "results" / "tables"

print("PROJECT_ROOT:", PROJECT_ROOT)
print("RESULTS_DIR :", RESULTS_DIR)

PROJECT_ROOT: C:\Users\siam2\Desktop\dsvm-dtw
RESULTS_DIR : C:\Users\siam2\Desktop\dsvm-dtw\results\tables


In [41]:
# 공통 파라미터 (나중에 다른 설정에도 재사용 가능하도록)
dataset = "ECG5000"
stream = "test_incontrol"
mode = "dtw"
data_version = "original"
seed = 2025
m = 100

# 병합 대상 파일들: 직접 나열
file_names = [
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}(0~1399)_bootstrap_seed{seed}_m={m}.csv",
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}(1400~1599)_bootstrap_seed{seed}_m={m}.csv",
]

file_paths = [RESULTS_DIR / name for name in file_names]
file_paths


[WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_incontrol_p_swk_matrix_dtw_original(0~1399)_bootstrap_seed2025_m=100.csv'),
 WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_incontrol_p_swk_matrix_dtw_original(1400~1599)_bootstrap_seed2025_m=100.csv')]

In [42]:
dfs = []
for path in file_paths:
    print(f"읽는 중: {path.name}")
    df = pd.read_csv(path, index_col=0)  # 't' index 복구
    print("  shape:", df.shape)
    dfs.append(df)

# 가로 방향(열 기준) 병합
df_merged = pd.concat(dfs, axis=1)

print("병합 결과 shape:", df_merged.shape)
df_merged.head()


읽는 중: ECG5000_test_incontrol_p_swk_matrix_dtw_original(0~1399)_bootstrap_seed2025_m=100.csv
  shape: (100, 1400)
읽는 중: ECG5000_test_incontrol_p_swk_matrix_dtw_original(1400~1599)_bootstrap_seed2025_m=100.csv
  shape: (100, 200)
병합 결과 shape: (100, 1600)


Unnamed: 0_level_0,rep_0,rep_1,rep_2,rep_3,rep_4,rep_5,rep_6,rep_7,rep_8,rep_9,...,rep_1590,rep_1591,rep_1592,rep_1593,rep_1594,rep_1595,rep_1596,rep_1597,rep_1598,rep_1599
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.335428,0.336391,0.317644,0.344476,0.350154,0.329662,0.371649,0.364257,0.321974,0.332561,...,0.330352,0.33521,0.322795,0.341783,0.331284,0.339571,0.321378,0.313664,0.321413,0.356983
1,0.353636,0.319329,0.340488,0.339925,0.322373,0.362845,0.406785,0.31954,0.318139,0.320281,...,0.320309,0.341136,0.335775,0.345725,0.311595,0.358152,0.373146,0.338016,0.336747,0.331077
2,0.320013,0.32129,0.312486,0.330468,0.333472,0.322151,0.329023,0.295538,0.331163,0.346134,...,0.347242,0.331914,0.322877,0.321049,0.329462,0.337543,0.322725,0.315653,0.315703,0.331281
3,0.346836,0.334228,0.31513,0.324208,0.310805,0.324133,0.325731,0.324162,0.334737,0.326392,...,0.33606,0.347402,0.340304,0.323832,0.35133,0.321128,0.369559,0.3362,0.312434,0.330798
4,0.329336,0.373085,0.312056,0.322756,0.323458,0.327323,0.356152,0.322536,0.344829,0.353662,...,0.331142,0.32928,0.316223,0.337115,0.315455,0.326364,0.348447,0.346866,0.30978,0.332657


In [43]:
# 1) index 동일성 체크
all_index_equal = all(dfs[0].index.equals(df.index) for df in dfs[1:])
if not all_index_equal:
    raise ValueError("병합 대상 파일들의 index(t)가 서로 다릅니다.")

# 2) 중복 컬럼 검사
if not df_merged.columns.is_unique:
    dup_cols = df_merged.columns[df_merged.columns.duplicated()]
    raise ValueError(f"중복된 컬럼이 있습니다: {dup_cols.tolist()}")

print("index 및 컬럼 검사 통과 ✅")


index 및 컬럼 검사 통과 ✅


In [44]:
# 파일명에서 rep 범위 (start, end)를 정규표현식으로 추출
pattern = re.compile(r".*\((\d+)~(\d+)\)_bootstrap")

ranges = []
for path in file_paths:
    mobj = pattern.match(path.name)
    if not mobj:
        raise ValueError(f"파일 이름에서 범위를 파싱할 수 없습니다: {path.name}")
    start = int(mobj.group(1))
    end = int(mobj.group(2))
    ranges.append((start, end))

ranges

global_start = min(s for s, e in ranges)
global_end = max(e for s, e in ranges)

rep_str_merged = f"{global_start}~{global_end}"

save_name_merged = (
    f"{dataset}_{stream}_p_swk_matrix_{mode}_{data_version}"
    f"({rep_str_merged})_bootstrap_seed{seed}_m={m}.csv"
)

save_path_merged = RESULTS_DIR / save_name_merged
save_path_merged


WindowsPath('C:/Users/siam2/Desktop/dsvm-dtw/results/tables/ECG5000_test_incontrol_p_swk_matrix_dtw_original(0~1599)_bootstrap_seed2025_m=100.csv')

In [45]:
df_merged.to_csv(save_path_merged)
print(f"✅ 병합 완료: {save_path_merged}")

✅ 병합 완료: C:\Users\siam2\Desktop\dsvm-dtw\results\tables\ECG5000_test_incontrol_p_swk_matrix_dtw_original(0~1599)_bootstrap_seed2025_m=100.csv
