In [None]:
import os
import pandas as pd
from tqdm import tqdm

In [None]:
save_dir = "new_data"
os.makedirs(save_dir, exist_ok=True)

filename = "nasdaq_exteral_data.csv"
# filename = "All_external.csv"
chunk_size = 100000

In [None]:
reader = pd.read_csv(filename, chunksize=100_000, encoding='utf-8', low_memory=False)
symbols = set()

for chunk in reader:
    symbols.update(chunk['Stock_symbol'].unique())

print(symbols)

In [None]:
# 대상 심볼
stock_list = list(symbols)
filtered_data = {symbol: [] for symbol in stock_list}

chunk_size = 100000  # RAM 부담 적게
reader = pd.read_csv(filename, chunksize=chunk_size, encoding='utf-8', low_memory=False)

print("🚀 필터링 시작...")

for chunk in tqdm(reader):
    # 원하는 종목만 필터
    chunk = chunk[chunk["Stock_symbol"].isin(stock_list)]

    # 심볼별로 분리 저장
    for symbol in stock_list:
        symbol_df = chunk[chunk["Stock_symbol"] == symbol]
        if not symbol_df.empty:
            save_path = os.path.join(save_dir, f"{symbol}.csv.zst")
            # 헤더 포함은 처음에만 하도록 mode 분기
            write_mode = 'w' if not os.path.exists(save_path) else 'a'
            symbol_df.to_csv(
                save_path,
                compression={"method": "zstd", "level": 5},
                mode=write_mode,
                header=not os.path.exists(save_path),
                index=False
            )
            print(f"✅ 저장됨: {symbol} ({len(symbol_df)} rows)")

print("🎉 모든 회사 필터링 및 저장 완료!")