In [1]:
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense,GRU
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')


data_clean = pd.read_csv("future_ss2312_tick.csv")


# 1. 数据加载和预处理
# data_clean = data.sort_values(by='trade_time')
# 确保'close'列是数值型
data_clean['close'] = pd.to_numeric(data_clean['close'], errors='coerce')

price_features = ['昨收盘', '今开盘', '最高价', '最低价', '申买价一', '申卖价一']
for feature in price_features:
    data_clean[feature + '_diff'] = data_clean['close'] - data_clean[feature]

data_clean['trade_time'] = pd.to_datetime(data_clean['trade_time'])

# 将时间分解为小时和分钟
data_clean['hour'] = data_clean['trade_time'].dt.hour
data_clean['minute'] = data_clean['trade_time'].dt.minute

data_clean['close_diff'] = data_clean['close'].diff()

# Define label
data_clean['label'] = data_clean['close'].shift(-100) - data_clean['close']

features = ['close_diff', '数量', 'hour', 'minute'] + [f + '_diff' for f in price_features]

# 3. 分割数据

# Now you can filter the data between two dates
train_data = data_clean[(data_clean['trade_time'] >= '2023-09-01 09:00:00') & 
                        (data_clean['trade_time'] < '2023-10-12 09:00:00')]

test_data = data_clean[(data_clean['trade_time'] >= '2023-10-12 09:00:00') & 
                        (data_clean['trade_time'] < '2023-10-20 09:00:00')]


# 初始化归一化器
scaler = MinMaxScaler(feature_range=(0, 1))

train_data[features] = scaler.fit_transform(train_data[features])

time_steps = 200  # 例如，使用过去 100 个时间点

# 将 DataFrame 转换为 NumPy 数组
X_train = np.array(train_data[features])
y_train = np.array(train_data['label'])

# 删除 NaN 值
mask = ~np.isnan(X_train).any(axis=1)
X_train = X_train[mask]
y_train = y_train[mask]

# 首先，确保 X_train 和 X_test 没有 NaN 值
X_train = X_train[~np.isnan(X_train).any(axis=1)]
y_train = y_train[~np.isnan(X_train).any(axis=1)]

# 准备 GRU 数据
def create_gru_data(data, time_steps):
    gru_data = []
    for i in range(len(data) - time_steps):
        gru_data.append(data[i:(i + time_steps)])
    return np.array(gru_data)

X_train_gru = create_gru_data(X_train, time_steps)
y_train_gru = y_train[time_steps:]

# 创建 GRU 模型
model = Sequential()
model.add(GRU(units=50, input_shape=(time_steps, X_train.shape[1])))
model.add(Dense(1, activation='linear'))  # 使用线性激活函数

model.compile(optimizer='adam', loss='mean_squared_error')  # 使用均方误差损失函数
model.fit(X_train_gru, y_train_gru, epochs=5, batch_size=32)






Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x2a73e7fd0>

In [7]:
def predict_next_move(tick, model,time_steps,historical_data,scaler):
    # 将新的 tick 数据追加到历史数据中
    historical_data = pd.concat([historical_data, pd.DataFrame([tick])], ignore_index=True)
    

    # 检查是否有足够的数据来计算滚动和EWM特征
    if len(historical_data) >= time_steps+20:


        for feature in price_features:
            historical_data[feature + '_diff'] = historical_data['close'] - historical_data[feature]

        # 将时间分解为小时和分钟
        historical_data['hour'] = historical_data['trade_time'].dt.hour
        historical_data['minute'] = historical_data['trade_time'].dt.minute

        historical_data['close_diff'] = historical_data['close'].diff()


        data_for_scaling = historical_data[features].dropna()

        # 选择最近的time_steps行用于归一化
        data_to_scale = data_for_scaling.tail(time_steps)

        # 归一化
        scaled_data = scaler.transform(data_to_scale)
        

        # 使用归一化的数据创建模型输入
        X_new = scaled_data.reshape(1, time_steps, len(features))


        # 检查X_new是否包含NaN值
        if np.isnan(X_new).any():
            return None, historical_data
        else:
            # 进行预测
            predicted_change = model.predict(X_new, verbose=0)
            predicted_change_value = predicted_change[0][0]

            return predicted_change_value, historical_data
    else:
        # 数据不足以进行预测
        return None, historical_data





# Initialize historical_data with the correct column names and types if necessary
historical_data = pd.DataFrame()
scaled_historical_data = pd.DataFrame()

initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 3
sold_threshold = 0
transactions = []
minute_count = 0

for idx, row in test_data.iterrows():
    current_probability, historical_data = predict_next_move(row, model,time_steps,historical_data,scaler) 
    # print(current_probability)
    # print(row['trade_time'])
    if current_probability is not None:
        current_price = row['close']
        minute_count = minute_count + 1
            
        if current_probability > buy_threshold and stock_quantity == 0 :
            print(current_probability)
            stock_quantity = funds // current_price
            funds -= stock_quantity * current_price
            fee = stock_quantity * 2
            funds -= fee
            stock_price = current_price
            buy_price = current_price  # 记录买入价格
            print(f"Time: {row['trade_time']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['trade_time'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 100 and stock_quantity > 0 and current_probability<sold_threshold:
            print(current_probability)
            funds += stock_quantity * current_price
            price_diff = current_price - buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['trade_time']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['trade_time'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            buy_price = 0  # 重置买入价格为0

    
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick_gru.csv')

3.1662633
Time: 2023-10-12 10:56:00 - Action: BUY at 14725.0, Quantity: 6.0,Funds:11638.0
-0.18375309
Time: 2023-10-12 10:57:06 - Action: SELL at 14730.0, Quantity: 6.0,Funds:100018.0, Price Change: 涨
3.1300614
Time: 2023-10-12 21:00:02 - Action: BUY at 14630.0, Quantity: 6.0,Funds:12226.0
-0.26575083
Time: 2023-10-12 21:00:54 - Action: SELL at 14650.0, Quantity: 6.0,Funds:100126.0, Price Change: 涨
3.0281582
Time: 2023-10-12 21:01:54 - Action: BUY at 14620.0, Quantity: 6.0,Funds:12394.0
-2.0019956
Time: 2023-10-12 21:02:46 - Action: SELL at 14635.0, Quantity: 6.0,Funds:100204.0, Price Change: 涨
3.5782528
Time: 2023-10-12 21:29:04 - Action: BUY at 14715.0, Quantity: 6.0,Funds:11902.0
-0.18632545
Time: 2023-10-12 21:29:58 - Action: SELL at 14725.0, Quantity: 6.0,Funds:100252.0, Price Change: 涨
5.180475
Time: 2023-10-12 22:00:00 - Action: BUY at 14690.0, Quantity: 6.0,Funds:12100.0
-0.922963
Time: 2023-10-12 22:01:13 - Action: SELL at 14700.0, Quantity: 6.0,Funds:100300.0, Price Change: 涨

: 

In [5]:
from keras.models import load_model
# model.save('gru_model.h5')
model = load_model('gru_model.h5')

In [1]:
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick_gru.csv')

NameError: name 'transactions' is not defined