In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from p_ftd import calculate_volatility, add_padding, perform_fft, apply_threshold, inverse_fft, remove_padding
import matplotlib.pyplot as plt

In [None]:
# 1. 数据读取与预处理 
# 读取SP500指数数据，解析日期列
df = pd.read_csv('SP500.csv', parse_dates=['date'])
# 按日期升序排序
df.sort_values('date', inplace=True)

# 选择特征列：开盘价、最高价、最低价、收盘价、成交量
features = ['open', 'high', 'low', 'close', 'volume']
# 目标预测列为收盘价
target_col = 'close'

# 提取特征数据和目标数据
data = df[features].values  # shape: (n_samples, 5)
target = df[target_col].values
dates = df['date']
n_samples = len(data)

# P-FTD去噪算法的默认参数
# N: 计算波动率的窗口大小
# m: 填充长度参数
# epsilon: 频域阈值参数
N = 40
m = 40
epsilon = 0.2

In [None]:
# 2. 使用 P-FTD 去噪 close 列 ----------------
def p_ftd_denoise_pipeline(data_array, N, m, epsilon):
    sigma1, sigma2 = calculate_volatility(data_array, N)
    padded = add_padding(data_array, sigma1, sigma2, m)
    fft_result = perform_fft(padded)
    filtered_fft = apply_threshold(fft_result, epsilon)
    denoised_padded = inverse_fft(filtered_fft)
    denoised = remove_padding(denoised_padded, len(data_array), m)
    return denoised

# 对收盘价进行去噪处理
denoised_close = p_ftd_denoise_pipeline(target, N, m, epsilon)

# 创建去噪后的完整数据集，仅替换收盘价列
data_denoised = data.copy()
data_denoised[:, 3] = denoised_close

In [None]:
# 3. 归一化：多特征整体归一化
# 使用Min-Max缩放将数据归一化到[0,1]范围
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)  # 原始数据归一化
scaled_denoised = scaler.transform(data_denoised)  # 去噪数据归一化

In [None]:
#4. 构造时间序列数据 
time_step = 20  # 使用前20天数据预测下一天，约等于一个月交易日

def create_dataset(multivariate_data, time_step=20, target_index=3):
    """
    构建时序数据集：
    - X: 输入特征，shape=(样本数, 时间步长, 特征数)
    - y: 目标值，shape=(样本数,)
    """
    X, y = [], []
    for i in range(len(multivariate_data) - time_step - 1):
        X.append(multivariate_data[i:(i + time_step), :])  # 历史特征
        y.append(multivariate_data[i + time_step, target_index])  # 未来收盘价
    return np.array(X), np.array(y)

# 为原始数据和去噪数据分别创建时序数据集
X_raw, y_raw = create_dataset(scaled_data, time_step)
X_denoised, y_denoised = create_dataset(scaled_denoised, time_step)