In [6]:
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')


data_clean = pd.read_csv("future_ss2312_tick.csv")


# 1. 数据加载和预处理
# data_clean = data.sort_values(by='trade_time')
# 确保'close'列是数值型
data_clean['close'] = pd.to_numeric(data_clean['close'], errors='coerce')

# 2. 特征生成
# Example usage:
rolling_windows = {'mean': 30, 'std': 30, 'rsi': 40}
ewm_spans = {'short': 20, 'long': 80, 'signal': 80}


# Calculate rolling mean and standard deviation
data_clean['rolling_mean'] = data_clean['close'].rolling(window=rolling_windows['mean']).mean()
data_clean['rolling_std'] = data_clean['close'].rolling(window=rolling_windows['std']).std()

# Calculate RSI
delta = data_clean['close'].diff()
gain = (delta.where(delta > 0, 0)).fillna(0)
loss = (-delta.where(delta < 0, 0)).fillna(0)
avg_gain = gain.rolling(window=rolling_windows['rsi']).mean()
avg_loss = loss.rolling(window=rolling_windows['rsi']).mean()
rs = avg_gain / avg_loss
data_clean['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
short_ema = data_clean['close'].ewm(span=ewm_spans['short'], adjust=False).mean()
long_ema = data_clean['close'].ewm(span=ewm_spans['long'], adjust=False).mean()
data_clean['MACD'] = short_ema - long_ema
data_clean['MACD_signal'] = data_clean['MACD'].ewm(span=ewm_spans['signal'], adjust=False).mean()

# Shift RSI and MACD to use them as features for next timestep
data_clean['RSI_shifted'] = data_clean['RSI'].shift(1)
data_clean['MACD_shifted'] = data_clean['MACD'].shift(1)
data_clean['MACD_signal_shifted'] = data_clean['MACD_signal'].shift(1)

# 累积成交量特征生成
data_clean['volume'] = pd.to_numeric(data_clean['数量'], errors='coerce')
data_clean['volume_rolling_mean'] = data_clean['volume'].rolling(window=30).mean()

# 五档挂牌价差特征生成
bid_columns = ['申买价一', '申买价二', '申买价三', '申买价四', '申买价五']
ask_columns = ['申卖价一', '申卖价二', '申卖价三', '申卖价四', '申卖价五']
data_clean['average_bid'] = data_clean[bid_columns].mean(axis=1)
data_clean['average_ask'] = data_clean[ask_columns].mean(axis=1)
data_clean['bid_ask_spread'] = data_clean['average_ask'] - data_clean['average_bid']
# 价格变化率
data_clean['price_change_rate'] = data_clean['close'].pct_change()

# 成交量变化
data_clean['volume_change'] = data_clean['volume'].diff()

# 买卖挂牌量比
data_clean['bid_ask_volume_ratio'] = data_clean['申买量一'] / data_clean['申卖量一']

# Define label
data_clean['label'] = (data_clean['close'].shift(-100) > data_clean['close']).astype(int)

#下采样
data_clean = data_clean.iloc[::10, :]

# 3. 分割数据
# 分割数据为训练集和测试集
# Convert the 'trade_time' column to datetime
data_clean['trade_time'] = pd.to_datetime(data_clean['trade_time'])

# Now you can filter the data between two dates
train_data = data_clean[(data_clean['trade_time'] >= '2023-09-01 09:00:00') & 
                        (data_clean['trade_time'] < '2023-09-30 09:00:00')]

test_data = data_clean[(data_clean['trade_time'] >= '2023-09-30 09:00:00') & 
                        (data_clean['trade_time'] < '2023-10-12 09:00:00')]

# print(train_data)
# print(test_data)


features = ['close', 'rolling_mean', 'rolling_std', 'RSI_shifted', 'MACD_shifted', 'MACD_signal_shifted']


# 更新特征列表
features += ['volume_rolling_mean', 'bid_ask_spread', 'price_change_rate', 'volume_change', 'bid_ask_volume_ratio']

# 初始化归一化器
scaler = MinMaxScaler(feature_range=(0, 1))

train_data[features] = scaler.fit_transform(train_data[features])


time_steps = 100  # 例如，使用过去 100 个时间点

# 将 DataFrame 转换为 NumPy 数组
X_train = np.array(train_data[features])
y_train = np.array(train_data['label'])

# 删除 NaN 值
mask = ~np.isnan(X_train).any(axis=1)
X_train = X_train[mask]
y_train = y_train[mask]

# 首先，确保 X_train 和 X_test 没有 NaN 值
X_train = X_train[~np.isnan(X_train).any(axis=1)]
y_train = y_train[~np.isnan(X_train).any(axis=1)]

# 准备 LSTM 数据
def create_lstm_data(data, time_steps):
    lstm_data = []
    for i in range(len(data) - time_steps):
        lstm_data.append(data[i:(i + time_steps)])
    return np.array(lstm_data)

X_train_lstm = create_lstm_data(X_train, time_steps)
y_train_lstm = y_train[time_steps:]

# 创建模型
model = Sequential()
model.add(LSTM(units=50, input_shape=(time_steps, X_train.shape[1])))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(X_train_lstm, y_train_lstm, epochs=10, batch_size=32)





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x3ebdff2b0>

In [7]:
def predict_next_move(tick, model, rolling_windows, ewm_spans, time_steps,historical_data,scaler):
    # 将新的 tick 数据追加到历史数据中
    historical_data = pd.concat([historical_data, pd.DataFrame([tick])], ignore_index=True)
    

    # 检查是否有足够的数据来计算滚动和EWM特征
    if len(historical_data) >= max(rolling_windows['mean'], rolling_windows['std'], rolling_windows['rsi'], ewm_spans['long'],time_steps+20):
        
        # 在历史数据上计算滚动平均和标准差
        historical_data['rolling_mean'] = historical_data['close'].rolling(window=rolling_windows['mean'], min_periods=1).mean()
        historical_data['rolling_std'] = historical_data['close'].rolling(window=rolling_windows['std'], min_periods=1).std()

        # 在历史数据上计算RSI
        delta = historical_data['close'].diff()
        gain = (delta.where(delta > 0, 0)).fillna(0)
        loss = (-delta.where(delta < 0, 0)).fillna(0)
        avg_gain = gain.rolling(window=rolling_windows['rsi'], min_periods=1).mean()
        avg_loss = loss.rolling(window=rolling_windows['rsi'], min_periods=1).mean()
        rs = avg_gain / avg_loss.replace(0, 1)  # 避免除以零
        historical_data['RSI'] = 100 - (100 / (1 + rs))

        # 在历史数据上计算MACD
        short_ema = historical_data['close'].ewm(span=ewm_spans['short'], adjust=False, min_periods=1).mean()
        long_ema = historical_data['close'].ewm(span=ewm_spans['long'], adjust=False, min_periods=1).mean()
        historical_data['MACD'] = short_ema - long_ema
        historical_data['MACD_signal'] = historical_data['MACD'].ewm(span=ewm_spans['signal'], adjust=False, min_periods=1).mean()

        # 将RSI和MACD移位以用作下一个时间步的特征
        historical_data['RSI_shifted'] = historical_data['RSI'].shift(1)
        historical_data['MACD_shifted'] = historical_data['MACD'].shift(1)
        historical_data['MACD_signal_shifted'] = historical_data['MACD_signal'].shift(1)
        
        # 计算五档挂牌价差
        bid_columns = ['申买价一', '申买价二', '申买价三', '申买价四', '申买价五']
        ask_columns = ['申卖价一', '申卖价二', '申卖价三', '申卖价四', '申卖价五']
        historical_data['average_bid'] = historical_data[bid_columns].mean(axis=1)
        historical_data['average_ask'] = historical_data[ask_columns].mean(axis=1)
        historical_data['bid_ask_spread'] = historical_data['average_ask'] - historical_data['average_bid']
            # 价格变化率
        historical_data['price_change_rate'] = historical_data['close'].pct_change()

        # 成交量变化
        historical_data['volume_change'] = historical_data['volume'].diff()

        # 买卖挂牌量比
        historical_data['bid_ask_volume_ratio'] = historical_data['申买量一'] / historical_data['申卖量一']
        
        # 计算累积成交量的滚动平均
        historical_data['volume_rolling_mean'] = historical_data['数量'].rolling(window=30, min_periods=1).mean()

        data_for_scaling = historical_data[features].dropna()

        # 选择最近的time_steps行用于归一化
        data_to_scale = data_for_scaling.tail(time_steps)

        # 归一化
        scaled_data = scaler.transform(data_to_scale)
        

        # 使用归一化的数据创建模型输入
        X_new = scaled_data.reshape(1, time_steps, len(features))


        # 检查X_new是否包含NaN值
        if np.isnan(X_new).any():
            return None, historical_data
        else:
            # 进行预测
            prediction_proba = model.predict(X_new,verbose=0)
            probability_of_one = prediction_proba[0][0]

            return probability_of_one, historical_data
    else:
        # 数据不足以进行预测
        return None, historical_data





# Initialize historical_data with the correct column names and types if necessary
historical_data = pd.DataFrame()
scaled_historical_data = pd.DataFrame()

initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 0.6
sold_threshold = 0.4
transactions = []
minute_count = 0

for idx, row in test_data.iterrows():
    current_probability, historical_data = predict_next_move(row, model, rolling_windows, ewm_spans, time_steps,historical_data,scaler) 
    print(current_probability)
    print(row['trade_time'])
    if current_probability is not None:
        current_price = row['close']
        minute_count = minute_count + 1
            
        if current_probability > buy_threshold and stock_quantity == 0 :
            print(current_probability)
            stock_quantity = funds // current_price
            funds -= stock_quantity * current_price
            fee = stock_quantity * 2
            funds -= fee
            stock_price = current_price
            buy_price = current_price  # 记录买入价格
            print(f"Time: {row['trade_time']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['trade_time'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 100 and stock_quantity > 0 and current_probability<sold_threshold:
            print(current_probability)
            funds += stock_quantity * current_price
            price_diff = current_price - buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['trade_time']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['trade_time'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            buy_price = 0  # 重置买入价格为0

    
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick.csv')

None
2023-10-09 09:00:03
None
2023-10-09 09:00:08
None
2023-10-09 09:00:13
None
2023-10-09 09:00:18
None
2023-10-09 09:00:24
None
2023-10-09 09:00:29
None
2023-10-09 09:00:34
None
2023-10-09 09:00:39
None
2023-10-09 09:00:44
None
2023-10-09 09:00:49
None
2023-10-09 09:00:54
None
2023-10-09 09:00:59
None
2023-10-09 09:01:04
None
2023-10-09 09:01:09
None
2023-10-09 09:01:14
None
2023-10-09 09:01:19
None
2023-10-09 09:01:24
None
2023-10-09 09:01:29
None
2023-10-09 09:01:34
None
2023-10-09 09:01:39
None
2023-10-09 09:01:44
None
2023-10-09 09:01:49
None
2023-10-09 09:01:54
None
2023-10-09 09:01:59
None
2023-10-09 09:02:04
None
2023-10-09 09:02:09
None
2023-10-09 09:02:14
None
2023-10-09 09:02:19
None
2023-10-09 09:02:24
None
2023-10-09 09:02:29
None
2023-10-09 09:02:34
None
2023-10-09 09:02:40
None
2023-10-09 09:02:45
None
2023-10-09 09:02:50
None
2023-10-09 09:02:55
None
2023-10-09 09:03:00
None
2023-10-09 09:03:05
None
2023-10-09 09:03:10
None
2023-10-09 09:03:15
None
2023-10-09 09:03:20


NameError: name 'transactions_df' is not defined