In [2]:
import os

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

# 请替换为你的实际文件夹路径

data_folder =r'./unzipped_data'# 注意：确保路径最后没有多余斜杠

# 设置中文字体

plt.rcParams['font.sans-serif']= ['SimHei']

plt.rcParams['axes.unicode_minus']= False

# 读取部分csv文件（你可根据内存情况调整）

files = sorted([for f in os.listdir(data_folder) if f.endswith('.csv')])[:20]

# 初始化数据框

all_data = pd.DataFrame()

# 拼接所有数据

for file in files:

file_path = os.path.join(data_folder, file)

df = pd.read_csv(file_path)

df['date'] = file.replace('.csv', '')

all_data = pd.concat([all_data, df])

# 假设字段名如下，确保这些列存在于CSV中

# 字段：'datetime', 'open', 'high', 'low', 'close', 'volume', 'open_interest'

# 若实际字段不同，请修改下面的字段名

all_data['datetime']= pd.to_datetime(all_data['datetime'])

all_data.sort_values('datetime', inplace=True)

all_data.reset_index(drop=True, inplace=True)

# 生成未来30分钟涨跌幅标签

all_data['future_close']= all_data['close'].shift(-30)

all_data['return_30min']= (all_data['future_close'] - all_data['close']) / all_data['close']* 100

# 提取滑动窗口特征（以30分钟为窗口）

window = 30

all_data['mean_close']= all_data['close'].rolling(window).mean()

all_data['std_close']= all_data['close'].rolling(window).std()

all_data['range_close']= all_data['close'].rolling(window).apply(lambda x: x.max() - x.min())

all_data['momentum_10']= all_data['close'] - all_data['close'].shift(10)

all_data['bias_10']= (all_data['close'] - all_data['close'].rolling(10).mean())/ all_data['close'].rolling(10).mean() * 100

# 删除缺失值

all_data.dropna(inplace=True)

# 标准化选定特征

features = ['mean_close', 'std_close', 'range_close', 'momentum_10', 'bias_10']

scaler = StandardScaler()

all_data[features]= scaler.fit_transform(all_data[features])

# 可视化：收盘价与未来30分钟涨跌幅

plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)

plt.plot(all_data['datetime'], all_data['close'], label='Close Price')

plt.title('收盘价走势')

plt.xlabel('时间')

plt.ylabel('价格')

plt.legend()

plt.grid(True)

plt.subplot(1, 2, 2)

plt.plot(all_data['datetime'], all_data['return_30min'], label='30分钟涨跌幅', color='darkorange')

plt.title('未来30分钟涨跌幅')

plt.xlabel('时间')

plt.ylabel('涨跌幅（%）')

plt.legend()

plt.grid(True)

plt.tight_layout()

plt.savefig('收盘价走势_未来30分钟涨跌幅.png')

SyntaxError: invalid syntax (257778090.py, line 23)