In [4]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor

# 加载数据文件
data_path = "jq_stock_ticks.csv"
data = pd.read_csv(data_path)

# 定义五档买卖价量数据作为特征和窗口大小
window_size = 10
feature_columns = [col for col in data.columns if col.startswith('a') or col.startswith('b')]

# 使用连续的十个时间窗口的摆盘作为特征
features = pd.concat([data[feature_columns].shift(i) for i in range(window_size)], axis=1).dropna()
features.columns = [f"{col}_{i}" for i in range(window_size) for col in feature_columns]


# 定义滚动窗口大小和偏移量
rolling_window_size = 101
shift_offset = -100

# 计算未来100条数据的成交均价
target_100_corrected = data['current'].rolling(window=rolling_window_size).mean().shift(shift_offset)

# 考虑使用了10个时间窗口，因此需要删除前9个NaN值，以及后面的100个NaN值
target_100_corrected = target_100_corrected.iloc[window_size - 1 + 9: -(rolling_window_size - 1) - 1]


# 考虑使用了10个时间窗口，因此需要删除前9个NaN值，以及后面的100个NaN值
target_100_finalized = target_100_corrected.iloc[window_size - 1 + 9: -(rolling_window_size - 1)]

# 使特征和目标的长度一致
features_finalized = features.iloc[:len(target_100_finalized)]


# 创建并训练梯度提升回归模型
regressor = GradientBoostingRegressor(random_state=42)
regressor.fit(features_finalized, target_100_finalized)


In [3]:

# 加载新数据进行预测
new_data_path = "jq_stock_ticks_next.csv"
new_data = pd.read_csv(new_data_path)
new_data['timestamp'] = pd.to_datetime(new_data['time'])
new_data['date'] = new_data['timestamp'].dt.date

# 提取新数据中的五档买卖价量数据，使用连续的十个时间窗口的摆盘作为特征
new_features = pd.concat([new_data[feature_columns].shift(i) for i in range(window_size)], axis=1).dropna()
new_features.columns = [f"{col}_{i}" for i in range(window_size) for col in feature_columns]

# 使用训练的模型进行预测
new_predictions = regressor.predict(new_features)

# 初始化变量
capital = 100000  # 初始资本
shares = 0  # 持有股票数量
buy_sell_log = []  # 存储买入和卖出的日志
threshold = 0.01 # 买入卖出门槛
trade_count = 0  # 交易次数
prev_date = None

# 遍历预测和实际价格
for idx, (prediction, current_price, date) in enumerate(zip(new_predictions, new_data['current'].iloc[window_size - 1:], new_data['date'].iloc[window_size - 1:])):
    timestamp = new_data['timestamp'].iloc[idx + window_size - 1]
    price_difference = prediction - current_price

    if price_difference > threshold and capital > 0:  # 买入条件
        purchase_shares = capital / current_price * (1 - 0.0003)  # 扣除交易费
        shares += purchase_shares
        capital = 0
        buy_sell_log.append((timestamp, 'buy', current_price))
        trade_count += 1
    elif price_difference < -threshold and shares > 0:  # 卖出条件
        capital += shares * current_price * (1 - 0.0003)  # 扣除交易费
        shares = 0
        buy_sell_log.append((timestamp, 'sell', current_price))
        trade_count += 1

# 最后的卖出（如果还持有股票）
if shares > 0:
    capital += shares * new_data['current'].iloc[-1] * (1 - 0.0003)

# 打印买入和卖出日志
for log in buy_sell_log:
    print(f"Time: {log[0]}, Action: {log[1]}, Price: {log[2]:.2f}")

initial_capital = 100000
final_capital = capital
profit_percentage = (final_capital - initial_capital) / initial_capital * 100

final_results = {
    "最终资本": final_capital,
    "收益率": profit_percentage,
    "总交易次数": trade_count
}

final_results


{'最终资本': 100000, '收益率': 0.0, '总交易次数': 0}