In [17]:
import pandas as pd
import numpy as np

# =========================
# 0) 파일 로드
# =========================
macro = pd.read_csv("macro.csv", encoding="utf-8-sig")
tone  = pd.read_csv("final_monthly_tone_index.csv", encoding="utf-8-sig")

macro.columns = macro.columns.str.strip()
tone.columns  = tone.columns.str.strip()

# =========================
# 1) macro Date -> 월 Period (길이 맞게!)
# =========================
macro["Date"] = macro["Date"].astype(str).str.strip()

s = (macro["Date"]
     .str.replace("년", "-", regex=False)
     .str.replace("월", "", regex=False)
     .str.replace(".", "-", regex=False)
     .str.replace("/", "-", regex=False)
     .str.replace(" ", "", regex=False)
)

# YYYY-M(M)만 추출 (나머지는 NaN)
s = s.str.extract(r"(\d{4}-\d{1,2})", expand=False)

# 1자리 월은 0 채우기 (NaN이면 그대로)
def pad_month(x):
    if isinstance(x, str) and "-" in x:
        y, m = x.split("-")
        return f"{y}-{m.zfill(2)}"
    return np.nan

s = s.apply(pad_month)

# ✅ 여기서 dropna 하면 길이가 줄어서 에러남 → dropna 금지
# PeriodIndex도 NaN이 있으면 에러날 수 있어서 to_datetime 거쳐서 처리
dt = pd.to_datetime(s, format="%Y-%m", errors="coerce")
macro["date"] = dt.dt.to_period("M")

# date 못 만든 행 제거
macro = macro.dropna(subset=["date"]).copy()

# Date 원본 제거(선택)
macro = macro.drop(columns=["Date"], errors="ignore")

# =========================
# 2) tone date 처리
# =========================
tone["date"] = pd.to_datetime(tone["date"], errors="coerce").dt.to_period("M")
tone = tone.dropna(subset=["date"]).copy()

# =========================
# 3) z_ 로 시작하는 톤 변수 제거
# =========================
tone = tone.loc[:, ~tone.columns.str.startswith("z_")]

# =========================
# 4) outer merge (누락 없이)
# =========================
df_final = pd.merge(macro, tone, on="date", how="outer", sort=True)
df_final = df_final.sort_values("date").set_index("date")

# =========================
# 5) 숫자형 변환
# =========================
for c in df_final.columns:
    df_final[c] = pd.to_numeric(
        df_final[c].astype(str)
                   .str.replace(",", "", regex=False)
                   .str.replace("%", "", regex=False)
                   .str.strip(),
        errors="coerce"
    )

# =========================
# 6) rename (요청 반영)
# =========================
rename_map = {
    "경제정책 불확실성 지수(EPU)": "epu_index",
    "기준 금리": "bok_rate",
    "기준 금리.1": "delta_bok",        # ✅ 타겟
    "뉴스심리지수": "news_sentiment",
    "산업생산지수 갭": "output_gap",
    "산업생산증가율": "ip_growth",
    "인플레이션 갭(소비자물가지수)": "cpi_infl_gap",
    "콜 금리": "call_rate_m",
    "final_monthly_tone": "tone"
}
df_final = df_final.rename(columns=rename_map)

# =========================
# 7) 확인
# =========================
print("기간:", df_final.index.min(), "~", df_final.index.max())
print("행 개수:", len(df_final))
print("컬럼:", df_final.columns.tolist())
display(df_final.tail(12))

# =========================
# 8) 저장
# =========================
df_final.reset_index().to_csv("final_monthly_merged_renamed.csv", index=False, encoding="utf-8-sig")
print("✅ 저장 완료: final_monthly_merged_renamed.csv")


기간: 2012-01 ~ 2025-12
행 개수: 168
컬럼: ['epu_index', 'bok_rate', 'delta_bok', 'news_sentiment', 'output_gap', 'ip_growth', 'cpi_infl_gap', 'call_rate_m', 'tone']


Unnamed: 0_level_0,epu_index,bok_rate,delta_bok,news_sentiment,output_gap,ip_growth,cpi_infl_gap,call_rate_m,tone
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-01,789.444385,3.0,0.0,99.32,-1.373306,-1.6,0.2,3.0555,0.082858
2025-02,585.145726,2.75,-0.25,99.85,-0.660233,0.7,0.0,2.96585,-0.15107
2025-03,614.288102,2.75,0.0,93.73,0.453854,1.1,0.1,2.75205,0.249046
2025-04,799.764812,2.75,0.0,97.94,-0.431249,-0.7,0.1,2.767455,0.143118
2025-05,588.987204,2.5,-0.25,101.71,-1.915775,-1.2,-0.1,2.739947,0.124295
2025-06,529.545115,2.5,0.0,107.96,-0.19993,1.6,0.2,2.518842,-0.251332
2025-07,492.634115,2.5,0.0,107.94,0.116215,0.4,0.1,2.496783,0.138245
2025-08,392.720816,2.5,0.0,105.85,-0.2674,-0.3,-0.3,2.5062,0.246915
2025-09,,2.5,0.0,109.09,1.14916,1.3,0.1,2.525682,-0.207105
2025-10,,2.5,0.0,113.32,-1.834152,-2.5,0.4,2.513833,0.266826


✅ 저장 완료: final_monthly_merged_renamed.csv
