In [None]:
pip install pandas pandas-datareader yfinance quandl

In [1]:
import pandas as pd
import pandas_datareader.data as web
import yfinance as yf
import datetime
import os
import sys

# --- 0. 설정 ---

# 프로젝트 루트 경로
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)
    print(f"Added project root to sys.path: {PROJECT_ROOT}")

# 데이터 기간 설정 (리베이스를 위해 2015년부터 시작)
START_DATE = datetime.datetime(2015, 1, 1)
END_DATE = datetime.datetime.now()

# 저장 경로
SAVE_DIR = os.path.join(PROJECT_ROOT, "data", "processed")
SAVE_FILE_PATH = os.path.join(SAVE_DIR, "macro_data.csv")
os.makedirs(SAVE_DIR, exist_ok=True)

print(f"--- Creating macro_data.csv (Plan D) ---")
print(f"Period: {START_DATE.date()} to {END_DATE.date()}")

# --- 1. WTI 및 BDI 프록시 (BDRY) 동시 조회 (yfinance) ---
# [!!!] 커널 재시작 후 이 셀만 단독으로 실행하세요 [!!!]
print("1/3: Fetching WTI (CL=F) and BDI Proxy (BDRY) from yfinance...")
try:
    # 두 티커를 리스트로 묶어 한 번에 요청
    data = yf.download(['CL=F', 'BDRY'], start=START_DATE, end=END_DATE)
    
    # yfinance가 멀티-레벨 컬럼으로 반환하므로 'Close' 가격만 추출
    wti_series = data['Close']['CL=F'].rename('wti')
    bdi_series = data['Close']['BDRY'].rename('bdi_proxy')
    
    print(f"  > Fetched WTI data: {len(wti_series)} rows")
    print(f"  > Fetched BDI Proxy (BDRY) data: {len(bdi_series)} rows")

except Exception as e:
    print(f"  > yfinance Fetch FAILED: {e}")
    # 실패 시에도 코드가 중단되지 않도록 빈 시리즈 생성
    wti_series = pd.Series(dtype='float64', name='wti') 
    bdi_series = pd.Series(dtype='float64', name='bdi_proxy')

# --- 2. 신조선가 프록시 (FRED PPI) 불러오기 및 리베이스 ---
print("2/3: Fetching Newbuild Proxy (PCU336611336611) from FRED...")
try:
    proxy_df = web.DataReader('PCU336611336611', 'fred', START_DATE, END_DATE)
    proxy_series = proxy_df['PCU336611336611']
    
    # 2015년 평균값 계산 (리베이스 기준)
    base_value = proxy_series['2015'].mean()
    
    # 2015=100으로 리베이스
    rebased_series = (proxy_series / base_value) * 100
    rebased_series.name = 'newbuild_proxy_2015_100'
    
    print(f"  > Fetched Proxy data: {len(rebased_series)} rows")
    print(f"  > 2015 Base Value for rebasing: {base_value:.2f}")
except Exception as e:
    print(f"  > Proxy Fetch FAILED: {e}")
    rebased_series = pd.Series(dtype='float64', name='newbuild_proxy_2015_100')

# --- 3. 데이터 병합, 일별 리샘플링 및 ffill ---
print("3/3: Combining, resampling to daily, and forward-filling...")

# 기준이 될 일별 캘린더 생성
daily_calendar = pd.date_range(start=START_DATE, end=END_DATE, freq='D')
master_df = pd.DataFrame(index=daily_calendar)

# 3개 시리즈를 마스터 캘린더에 병합 (join)
master_df = master_df.join(wti_series)
master_df = master_df.join(bdi_series) 
master_df = master_df.join(rebased_series)

# [핵심] ffill (Forward Fill) 적용
master_df = master_df.ffill()

# 데이터가 시작되기 전의 맨 앞쪽 NaN 값들 제거
master_df = master_df.dropna(how='all') # 'any' -> 'all' (모든 컬럼이 NaN인 행만 제거)

# --- 4. CSV 파일로 저장 ---
master_df.to_csv(SAVE_FILE_PATH, index=True, index_label='date')

print("\n--- Success! ---")
print(f"Saved macro_data.csv to: {SAVE_FILE_PATH}")
print("\n--- Data Head (5 rows) ---")
print(master_df.head())
print("\n--- Data Tail (5 rows) ---")
print(master_df.tail())

Added project root to sys.path: /workspace/ship-ai
--- Creating macro_data.csv (Plan D) ---
Period: 2015-01-01 to 2025-11-01
1/3: Fetching WTI (CL=F) and BDI Proxy (BDRY) from yfinance...


  data = yf.download(['CL=F', 'BDRY'], start=START_DATE, end=END_DATE)
[*********************100%***********************]  2 of 2 completed

  > Fetched WTI data: 2725 rows
  > Fetched BDI Proxy (BDRY) data: 2725 rows
2/3: Fetching Newbuild Proxy (PCU336611336611) from FRED...
  > Fetched Proxy data: 128 rows
  > 2015 Base Value for rebasing: 203.89
3/3: Combining, resampling to daily, and forward-filling...

--- Success! ---
Saved macro_data.csv to: /workspace/ship-ai/data/processed/macro_data.csv

--- Data Head (5 rows) ---
                  wti  bdi_proxy  newbuild_proxy_2015_100
2015-01-01        NaN        NaN                 99.07222
2015-01-02  52.689999        NaN                 99.07222
2015-01-03  52.689999        NaN                 99.07222
2015-01-04  52.689999        NaN                 99.07222
2015-01-05  50.040001        NaN                 99.07222

--- Data Tail (5 rows) ---
                  wti  bdi_proxy  newbuild_proxy_2015_100
2025-10-28  60.150002      7.953               126.601545
2025-10-29  60.480000      8.200               126.601545
2025-10-30  60.570000      8.250               126.601545
2


