In [1]:
import pandas as pd

IN_PATH  = "./merged_spillover_data.csv"
OUT_PATH = "./merged_spillover_data_upto_2026_01_12.csv"

df = pd.read_csv(IN_PATH)
df["Date"] = pd.to_datetime(df["Date"])

cutoff = pd.Timestamp("2026-01-12")

df_cut = (
    df[df["Date"] <= cutoff]
    .sort_values("Date")
    .reset_index(drop=True)
)

print("Original shape:", df.shape)
print("Cut shape     :", df_cut.shape)
print("Date range    :", df_cut["Date"].min(), "~", df_cut["Date"].max())

df_cut.to_csv(OUT_PATH, index=False)
print("Saved to:", OUT_PATH)


Original shape: (1326, 6)
Cut shape     : (1326, 6)
Date range    : 2020-10-12 00:00:00 ~ 2026-01-12 00:00:00
Saved to: ./merged_spillover_data_upto_2026_01_12.csv


In [3]:
import numpy as np
import pandas as pd

IN_PATH  = "./merged_spillover_data_upto_2026_01_12.csv"
OUT_PATH = "./merged_spillover_var_input.csv"

df = pd.read_csv(IN_PATH)
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)

# 로그차분 대상
log_cols = ["SOLVPN", "COPPER", "DXY", "VIX"]

for col in log_cols:
    if col in df.columns:
        df[f"dlog_{col}"] = np.log(df[col]).diff()

# 금리 차분
if "UST10Y" in df.columns:
    df["d_UST10Y"] = df["UST10Y"].diff()

# 첫 행 제거
df_var = df.dropna().reset_index(drop=True)

print("VAR input shape:", df_var.shape)
print("Date range     :", df_var["Date"].min(), "~", df_var["Date"].max())

df_var.to_csv(OUT_PATH, index=False)
print("Saved to:", OUT_PATH)


VAR input shape: (1325, 11)
Date range     : 2020-10-13 00:00:00 ~ 2026-01-12 00:00:00
Saved to: ./merged_spillover_var_input.csv
