In [16]:
import pandas as pd

# 데이터 불러오기
df_env = pd.read_csv("../output_data/SPY_투자지표환경변수_통합.csv")
df_stock = pd.read_csv("../raw_data/SPY_주식데이터.csv")
df_ind = pd.read_csv("../raw_data/spy_nav_only.csv")
y = pd.read_csv("../output_data/SPY_y_target_data.csv")

# -------------------------------------------------------------------
# 1) 날짜 변환: tz 무시 + 문자열로 변환 후 날짜만 추출
# -------------------------------------------------------------------

def normalize_date(series):
    # 1) 문자열로 변환
    s = series.astype(str)
    # 2) 날짜만 잘라내기 (앞 10자리 YYYY-MM-DD)
    s = s.str.slice(0, 10)
    # 3) datetime 변환
    return pd.to_datetime(s, format="%Y-%m-%d", errors="coerce")

df_stock["Date"] = normalize_date(df_stock["Date"])
df_env["Date"]   = normalize_date(df_env["Date"])
df_ind["Date"]   = normalize_date(df_ind["Date"])

# -------------------------------------------------------------------
# 2) df_stock(Date) + df_env(Date) inner join
# -------------------------------------------------------------------
merged_1 = pd.merge(
    df_stock,
    df_env,
    on="Date",
    how="inner"
)

# -------------------------------------------------------------------
# 3) merged_1(Date) + df_ind(Date) inner join
# -------------------------------------------------------------------
final_df = pd.merge(
    merged_1,
    df_ind,
    on="Date",
    how="inner"
)

# -------------------------------------------------------------------
# 4) 결과 확인
# -------------------------------------------------------------------
print(final_df.head())
print(final_df.shape)
print(final_df.dtypes)

# ------------------------------------------------------
# 5) y 데이터 날짜 정리 후 y_target 컬럼 생성
# ------------------------------------------------------
y["Date"] = normalize_date(y["Date"])

# y_target = Close_SPY
y = y.rename(columns={"Close_SPY": "y_target"})

# 불필요한 다른 컬럼이 있다면 정리
y = y[["Date", "y_target"]]

# ------------------------------------------------------
# 6) final_df + y_target inner join
# ------------------------------------------------------
final_df = pd.merge(
    final_df,
    y,
    on="Date",
    how="inner"
)

# ------------------------------------------------------
# 7) 결과 확인
# ------------------------------------------------------
print(final_df.head())
print(final_df.shape)
print(final_df.columns)


        Date    SPY_High     SPY_Low   SPY_Close  SPY_Volume  SPY_Volatility  \
0 2022-11-07  380.570007  375.529999  379.950012    68286900        5.040009   
1 2022-11-08  385.119995  377.720001  382.000000    84641100        7.399994   
2 2022-11-09  381.140015  373.609985  374.130005    78495500        7.530029   
3 2022-11-10  395.040009  385.640015  394.690002   141455800        9.399994   
4 2022-11-11  399.350006  393.609985  398.510010    93839900        5.740021   

   DX-Y.NYB_Close   GC=F_Close   GLD_Close  SHY_Close  ...  SHY_Volatility  \
0      110.120003  1676.500000  155.850006  80.610001  ...        0.040001   
1      109.620003  1712.099976  159.449997  80.690002  ...        0.070000   
2      110.550003  1710.099976  158.649994  80.820000  ...        0.139999   
3      108.209999  1750.300049  163.479996  81.260002  ...        0.129997   
4      106.290001  1766.000000  164.559998  81.239998  ...        0.110001   

   WTI_Spot    PAYEMS  UNRATE  TNX (%)  기준금리  SPY_

In [17]:
# 1) Premium Ratio (%) 계산
final_df["SPY_Premium_pct"] = (final_df["SPY_Close"] - final_df["SPY_NAV"]) / final_df["SPY_NAV"] * 100

# 2) SPY_Close, SPY_NAV 삭제
final_df = final_df.drop(columns=["SPY_Close", "SPY_NAV"])

# 3) 확인
print(final_df.head())
print(final_df.shape)

        Date    SPY_High     SPY_Low  SPY_Volume  SPY_Volatility  \
0 2022-11-07  380.570007  375.529999    68286900        5.040009   
1 2022-11-08  385.119995  377.720001    84641100        7.399994   
2 2022-11-09  381.140015  373.609985    78495500        7.530029   
3 2022-11-10  395.040009  385.640015   141455800        9.399994   
4 2022-11-11  399.350006  393.609985    93839900        5.740021   

   DX-Y.NYB_Close   GC=F_Close   GLD_Close  SHY_Close  TLT_Close  ...  \
0      110.120003  1676.500000  155.850006  80.610001  93.279999  ...   
1      109.620003  1712.099976  159.449997  80.690002  94.300003  ...   
2      110.550003  1710.099976  158.649994  80.820000  94.610001  ...   
3      108.209999  1750.300049  163.479996  81.260002  98.250000  ...   
4      106.290001  1766.000000  164.559998  81.239998  97.889999  ...   

   WTI_Spot    PAYEMS  UNRATE  TNX (%)  기준금리  SPY_PER  SPY_PBR  CPIAUCSL  \
0     91.80  154210.0     3.6   0.4214   4.0    22.07      3.6   298.708   


In [18]:
final_df

Unnamed: 0,Date,SPY_High,SPY_Low,SPY_Volume,SPY_Volatility,DX-Y.NYB_Close,GC=F_Close,GLD_Close,SHY_Close,TLT_Close,...,WTI_Spot,PAYEMS,UNRATE,TNX (%),기준금리,SPY_PER,SPY_PBR,CPIAUCSL,y_target,SPY_Premium_pct
0,2022-11-07,380.570007,375.529999,68286900,5.040009,110.120003,1676.500000,155.850006,80.610001,93.279999,...,91.80,154210.0,3.6,0.4214,4.0,22.07,3.60,298.708,379.950012,0.034974
1,2022-11-08,385.119995,377.720001,84641100,7.399994,109.620003,1712.099976,159.449997,80.690002,94.300003,...,88.80,154210.0,3.6,0.4126,4.0,22.07,3.60,298.708,382.000000,0.015730
2,2022-11-09,381.140015,373.609985,78495500,7.530029,110.550003,1710.099976,158.649994,80.820000,94.610001,...,85.79,154210.0,3.6,0.4151,4.0,22.07,3.60,298.708,374.130005,0.006895
3,2022-11-10,395.040009,385.640015,141455800,9.399994,108.209999,1750.300049,163.479996,81.260002,98.250000,...,86.52,154210.0,3.6,0.3829,4.0,22.07,3.60,297.979,394.690002,-0.029572
4,2022-11-11,399.350006,393.609985,93839900,5.740021,106.290001,1766.000000,164.559998,81.239998,97.889999,...,89.14,154210.0,3.6,0.3813,4.0,22.07,3.60,297.979,398.510010,0.017100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749,2025-11-03,685.799988,679.940002,57315000,5.859985,99.870003,4000.300049,368.779999,82.720001,89.739998,...,61.79,159540.0,4.3,0.4106,4.0,27.10,5.01,324.368,683.340027,0.042058
750,2025-11-04,679.960022,674.580017,78427000,5.380005,100.220001,3947.699951,362.320007,82.750000,89.940002,...,61.38,159540.0,4.3,0.4089,4.0,27.10,5.01,324.368,675.239990,0.028337
751,2025-11-05,680.859985,674.169983,74402400,6.690002,100.199997,3980.300049,366.510010,82.690002,88.959999,...,60.40,159540.0,4.3,0.4157,4.0,27.10,5.01,324.368,677.580017,0.009694
752,2025-11-06,677.380005,668.719971,85035300,8.660034,99.730003,3979.899902,366.070007,82.790001,89.760002,...,60.24,159540.0,4.3,0.4093,4.0,27.10,5.01,324.368,670.309998,0.053712


In [19]:
final_df["Date"] = pd.to_datetime(final_df["Date"])
final_df = final_df.set_index("Date")

In [20]:
final_df

Unnamed: 0_level_0,SPY_High,SPY_Low,SPY_Volume,SPY_Volatility,DX-Y.NYB_Close,GC=F_Close,GLD_Close,SHY_Close,TLT_Close,^VIX_Close,...,WTI_Spot,PAYEMS,UNRATE,TNX (%),기준금리,SPY_PER,SPY_PBR,CPIAUCSL,y_target,SPY_Premium_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-07,380.570007,375.529999,68286900,5.040009,110.120003,1676.500000,155.850006,80.610001,93.279999,24.350000,...,91.80,154210.0,3.6,0.4214,4.0,22.07,3.60,298.708,379.950012,0.034974
2022-11-08,385.119995,377.720001,84641100,7.399994,109.620003,1712.099976,159.449997,80.690002,94.300003,25.540001,...,88.80,154210.0,3.6,0.4126,4.0,22.07,3.60,298.708,382.000000,0.015730
2022-11-09,381.140015,373.609985,78495500,7.530029,110.550003,1710.099976,158.649994,80.820000,94.610001,26.090000,...,85.79,154210.0,3.6,0.4151,4.0,22.07,3.60,298.708,374.130005,0.006895
2022-11-10,395.040009,385.640015,141455800,9.399994,108.209999,1750.300049,163.479996,81.260002,98.250000,23.530001,...,86.52,154210.0,3.6,0.3829,4.0,22.07,3.60,297.979,394.690002,-0.029572
2022-11-11,399.350006,393.609985,93839900,5.740021,106.290001,1766.000000,164.559998,81.239998,97.889999,22.520000,...,89.14,154210.0,3.6,0.3813,4.0,22.07,3.60,297.979,398.510010,0.017100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-03,685.799988,679.940002,57315000,5.859985,99.870003,4000.300049,368.779999,82.720001,89.739998,17.170000,...,61.79,159540.0,4.3,0.4106,4.0,27.10,5.01,324.368,683.340027,0.042058
2025-11-04,679.960022,674.580017,78427000,5.380005,100.220001,3947.699951,362.320007,82.750000,89.940002,19.000000,...,61.38,159540.0,4.3,0.4089,4.0,27.10,5.01,324.368,675.239990,0.028337
2025-11-05,680.859985,674.169983,74402400,6.690002,100.199997,3980.300049,366.510010,82.690002,88.959999,18.010000,...,60.40,159540.0,4.3,0.4157,4.0,27.10,5.01,324.368,677.580017,0.009694
2025-11-06,677.380005,668.719971,85035300,8.660034,99.730003,3979.899902,366.070007,82.790001,89.760002,19.500000,...,60.24,159540.0,4.3,0.4093,4.0,27.10,5.01,324.368,670.309998,0.053712


In [21]:
final_df.index

DatetimeIndex(['2022-11-07', '2022-11-08', '2022-11-09', '2022-11-10',
               '2022-11-11', '2022-11-14', '2022-11-15', '2022-11-16',
               '2022-11-17', '2022-11-18',
               ...
               '2025-10-27', '2025-10-28', '2025-10-29', '2025-10-30',
               '2025-10-31', '2025-11-03', '2025-11-04', '2025-11-05',
               '2025-11-06', '2025-11-07'],
              dtype='datetime64[ns]', name='Date', length=754, freq=None)

In [22]:
# 저장
final_df.to_csv("../output_data/final_merged_y_close.csv", index=True, encoding="utf-8-sig")