In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL

In [2]:
data = pd.read_csv('gold_rv_1h_15min.csv')

In [3]:
data.head()

Unnamed: 0,datetime,realized_volatility
0,2025-06-30 22:15:00,0.000814
1,2025-06-30 22:30:00,0.001013
2,2025-06-30 22:45:00,0.001262
3,2025-06-30 23:00:00,0.001297
4,2025-06-30 23:15:00,0.001259


In [4]:
len(data)

6974

In [5]:
data['datetime'] = pd.to_datetime(data['datetime'])
data = data.sort_values('datetime').set_index('datetime')

In [11]:
# 确保时间连续（15分钟频率），并插补缺失值
data = data.asfreq('15min')
data['realized_volatility'] = data['realized_volatility'].interpolate()

# === 2. 参数设置 ===
points_per_day = 96                # 每天96个点（15分钟间隔）
window_points  = points_per_day * 7  # 一周窗口 = 672个点
period = 48                        # 季节周期 = 12小时 = 48个点

vals = data['realized_volatility'].values
n = len(vals)
num_full_windows = n // window_points  # 只保留整周

# === 3. 结果容器 ===
trend = np.full(n, np.nan)
seasonal = np.full(n, np.nan)
resid = np.full(n, np.nan)

# === 4. 按周分解 ===
for i in range(num_full_windows):
    start = i * window_points
    end = start + window_points
    segment = vals[start:end]
    
    stl = STL(segment, period=period, robust=True)
    res = stl.fit()
    
    trend[start:end] = res.trend
    seasonal[start:end] = res.seasonal
    resid[start:end] = res.resid

# === 5. 拼接结果 ===
result = pd.DataFrame({
    'realized_volatility': data['realized_volatility'],
    'trend': trend,
    'seasonal': seasonal,
    'resid': resid
}, index=data.index)

In [15]:
# === 6. 去掉最后不足一周的样本 ===
result = result.iloc[:num_full_windows * window_points]

In [19]:
result_1 = result[['realized_volatility','trend','seasonal']]

In [21]:
result_1

Unnamed: 0_level_0,realized_volatility,trend,seasonal
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-06-30 22:15:00,0.000814,0.000938,-0.000138
2025-06-30 22:30:00,0.001013,0.000945,-0.000186
2025-06-30 22:45:00,0.001262,0.000953,-0.000015
2025-06-30 23:00:00,0.001297,0.000961,0.000172
2025-06-30 23:15:00,0.001259,0.000969,-0.000022
...,...,...,...
2025-09-08 21:00:00,0.004493,0.002993,0.001506
2025-09-08 21:15:00,0.004493,0.002997,0.001042
2025-09-08 21:30:00,0.003248,0.003000,0.000327
2025-09-08 21:45:00,0.001885,0.003004,-0.000843


In [23]:
# === 7. 保存结果 ===
result_1.to_csv('RV_STL.csv', index_label='datetime')