In [2]:
import pandas as pd
import os
import sys

# --- 0. 경로 설정 ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)
    
# [기준] 주가 데이터가 저장된 폴더
PRICE_DIR = os.path.join(PROJECT_ROOT, "prices_daily")

print(f"Loading price data from: {PRICE_DIR}")

# --- 1. 마스터 티커 리스트 정의 ---
TICKER_LIST = ["042660", "009540", "010140", "443060", "010620", "329180"]
TICKER_NAMES = {
    "042660": "HD한국조선해양",
    "009540": "HD현대중공업",
    "010140": "삼성중공업",
    "443060": "한화오션",
    "010620": "현대미포조선",
    "329180": "HD현대마린솔루션"
}

# --- 2. 6개 주가 CSV 파일 로드 및 병합 ---
all_price_dfs = []
for ticker in TICKER_LIST:
    file_path = os.path.join(PRICE_DIR, f"{ticker}.csv")
    
    try:
        df = pd.read_csv(file_path, parse_dates=['date'])
        
        # [핵심] 어떤 기업의 데이터인지 Ticker 컬럼 추가
        df['ticker'] = ticker
        
        all_price_dfs.append(df)
        print(f"  > Loaded {ticker} ({TICKER_NAMES.get(ticker)}): {len(df)} rows")
        
    except FileNotFoundError:
        print(f"  > [Warning] {file_path} not found. Skipping {ticker}.")
    except Exception as e:
        print(f"  > [Error] Loading {ticker} failed: {e}")

# --- 3. 하나의 master_df로 합치기 ---
if not all_price_dfs:
    print("\n[!!!] Error: No price data loaded. Please check the PRICE_DIR path.")
else:
    # 6개 기업의 주가 데이터를 상하(row-wise)로 합침
    master_df = pd.concat(all_price_dfs)
    
    # [정렬] 기업(ticker)별로, 그리고 날짜(date)별로 정렬
    master_df = master_df.sort_values(by=['ticker', 'date']).reset_index(drop=True)

    print("\n[SUCCESS] 6개 기업 주가 데이터를 'master_df'로 통합 완료!")
    print(f"Total rows in master_df: {len(master_df)}")
    
    print("\n--- master_df (Head) ---")
    print(master_df.head())
    
    print("\n--- master_df (Tail) ---")
    print(master_df.tail())

Loading price data from: /workspace/ship-ai/data/prices_daily
  > Loaded 042660 (HD한국조선해양): 1477 rows
  > Loaded 009540 (HD현대중공업): 1477 rows
  > Loaded 010140 (삼성중공업): 1477 rows
  > Loaded 443060 (한화오션): 159 rows
  > Loaded 010620 (현대미포조선): 1477 rows
  > Loaded 329180 (HD현대마린솔루션): 805 rows

[SUCCESS] 6개 기업 주가 데이터를 'master_df'로 통합 완료!
Total rows in master_df: 6872

--- master_df (Head) ---
        date    open    high     low   close  trading_volume  ticker
0 2019-01-02  129500  131500  125000  125500          143838  009540
1 2019-01-03  126500  133000  126000  131500          334941  009540
2 2019-01-04  132000  139500  130500  138500          369927  009540
3 2019-01-07  140500  143000  137500  139000          247943  009540
4 2019-01-08  140000  140000  133500  134000          247213  009540

--- master_df (Tail) ---
           date    open    high     low   close  trading_volume  ticker
6867 2024-12-23  161900  162900  154400  156900          167206  443060
6868 2024-12-24  157100 

In [5]:
DATA_DIR=os.path.join(PROJECT_ROOT, "processed")
CLEANED_FINANCIALS_FILE = os.path.join(DATA_DIR, "master.csv")
master_df.to_csv(CLEANED_FINANCIALS_FILE, index=False)