In [3]:
import os
import re
from collections import defaultdict

In [4]:
def group_merged_files(directory: str):
    """
    지정된 폴더 내 {종목명}_Merged_{연도}.csv 형식의 파일을 종목별로 그룹화하여 반환합니다.

    Parameters:
    directory (str): CSV 파일들이 저장된 디렉토리 경로

    Returns:
    dict: {종목명: [파일 목록]} 형태의 딕셔너리
    """
    file_pattern = re.compile(r"(.+)_Merged_(\d{4})\.csv")
    grouped_files = defaultdict(list)

    for filename in os.listdir(directory):
        match = file_pattern.match(filename)
        if match:
            stock_name = match.group(1)  # 종목명 추출
            grouped_files[stock_name].append(filename)

    return dict(grouped_files)

In [5]:
# 사용 예시
directory_path = "merged_data"  # 실제 폴더 경로로 변경
grouped_files = group_merged_files(directory_path)

In [6]:
# 결과 출력
for stock, files in grouped_files.items():
    print(f"{stock}: {files}")

AAPL: ['AAPL_Merged_2024.csv', 'AAPL_Merged_2023.csv']
DIS: ['DIS_Merged_2023.csv', 'DIS_Merged_2024.csv']
MRK: ['MRK_Merged_2023.csv']
PG: ['PG_Merged_2024.csv', 'PG_Merged_2023.csv']
UNH: ['UNH_Merged_2023.csv']
SHW: ['SHW_Merged_2023.csv']
KO: ['KO_Merged_2023.csv']
HON: ['HON_Merged_2024.csv', 'HON_Merged_2023.csv']
V: ['V_Merged_2024.csv', 'V_Merged_2023.csv']
MSFT: ['MSFT_Merged_2024.csv', 'MSFT_Merged_2023.csv']
MMM: ['MMM_Merged_2023.csv', 'MMM_Merged_2024.csv']
IBM: ['IBM_Merged_2023.csv']
CSCO: ['CSCO_Merged_2023.csv', 'CSCO_Merged_2024.csv']
AMZN: ['AMZN_Merged_2023.csv', 'AMZN_Merged_2024.csv']
HD: ['HD_Merged_2024.csv', 'HD_Merged_2023.csv']
TRV: ['TRV_Merged_2024.csv', 'TRV_Merged_2023.csv']
MCD: ['MCD_Merged_2023.csv']
NKE: ['NKE_Merged_2024.csv', 'NKE_Merged_2023.csv']
CAT: ['CAT_Merged_2024.csv', 'CAT_Merged_2023.csv']
VZ: ['VZ_Merged_2024.csv', 'VZ_Merged_2023.csv']
JPM: ['JPM_Merged_2023.csv', 'JPM_Merged_2024.csv']
GS: ['GS_Merged_2023.csv']
AXP: ['AXP_Merged_2023.c

In [7]:
len(grouped_files)

30