In [3]:
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense,GRU,Dropout, Bidirectional
from sklearn.preprocessing import MinMaxScaler
from keras.utils import Sequence
import warnings
warnings.filterwarnings('ignore')


data_clean = pd.read_csv("future_taobao_ss2401_tick.csv")


# 1. 数据加载和预处理
data_clean['last_price'] = pd.to_numeric(data_clean['last_price'], errors='coerce')

price_features = ['highest','lowest','bid_price1','ask_price1','bid_price2','ask_price2','bid_price3','ask_price3','bid_price4','ask_price4','bid_price5','ask_price5']
for feature in price_features:
    data_clean[feature + '_diff'] = data_clean['last_price'] - data_clean[feature]

data_clean['datetime'] = pd.to_datetime(data_clean['datetime'])

data_clean['last_price_diff'] = data_clean['last_price'].diff()

# Define label
data_clean['label'] = (data_clean['last_price'].shift(-100) > data_clean['last_price']).astype(int)


features = ['last_price_diff', 'volume','bid_volume1','bid_volume2','bid_volume3','bid_volume4','bid_volume5','ask_volume1','ask_volume2','ask_volume3','ask_volume4','ask_volume5'] + [f + '_diff' for f in price_features]

# 3. 分割数据

# Now you can filter the data between two dates
train_data = data_clean[(data_clean['datetime'] >= '2023-09-01 09:00:00') & 
                        (data_clean['datetime'] < '2023-11-10 09:00:00')]

test_data = data_clean[(data_clean['datetime'] >= '2023-11-10 09:00:00') & 
                        (data_clean['datetime'] < '2023-11-17 09:00:00')]


# 初始化归一化器
scaler = MinMaxScaler(feature_range=(0, 1))

train_data[features] = scaler.fit_transform(train_data[features])


# 将 DataFrame 转换为 NumPy 数组
X_train = np.array(train_data[features])
y_train = np.array(train_data['label'])

# 删除 NaN 值
mask = ~np.isnan(X_train).any(axis=1)
X_train = X_train[mask]
y_train = y_train[mask]

# 首先，确保 X_train 和 X_test 没有 NaN 值
X_train = X_train[~np.isnan(X_train).any(axis=1)]
y_train = y_train[~np.isnan(X_train).any(axis=1)]

class TimeseriesGenerator(Sequence):
    def __init__(self, data, labels, length, stride=1, batch_size=32):
        self.data = data
        self.labels = labels
        self.length = length
        self.stride = stride
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil((len(self.data) - self.length) / float(self.stride * self.batch_size)))

    def __getitem__(self, idx):
        batch_x = []
        batch_y = []

        start = idx * self.batch_size * self.stride
        end = start + self.batch_size * self.stride + self.length

        for i in range(start, min(end, len(self.data) - self.length), self.stride):
            batch_x.append(self.data[i: i + self.length])
            batch_y.append(self.labels[i + self.length])

        return np.array(batch_x), np.array(batch_y)

# 定义时间步长和步长
time_steps = 300
stride = 1  # 增加步长以减少内存使用

# 创建数据生成器
train_generator = TimeseriesGenerator(X_train, y_train, length=time_steps, stride=stride, batch_size=32)

model = Sequential()
model.add(LSTM(units=50, input_shape=(time_steps, X_train.shape[1])))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')


# 使用生成器训练模型
model.fit_generator(train_generator, epochs=5)
model.save('model_taobao_lstm.h5')

Epoch 1/5


2023-11-29 14:46:26.285677: I tensorflow/core/common_runtime/executor.cc:1210] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


 1393/20931 [>.............................] - ETA: 1:43:52 - loss: 0.6666

KeyboardInterrupt: 

In [None]:
from keras.models import load_model
model = load_model('model_taobao_lstm.h5')

def predict_next_move(tick, model, time_steps,historical_data,scaler):
    # 将新的 tick 数据追加到历史数据中
    historical_data = pd.concat([historical_data, pd.DataFrame([tick])], ignore_index=True)

    # 检查是否有足够的数据来计算滚动和EWM特征
    if len(historical_data) >= time_steps+20:


        for feature in price_features:
            historical_data[feature + '_diff'] = historical_data['last_price'] - historical_data[feature]

        historical_data['last_price_diff'] = historical_data['last_price'].diff()


        data_for_scaling = historical_data[features].dropna()

        # 选择最近的time_steps行用于归一化
        data_to_scale = data_for_scaling.tail(time_steps)

        # 归一化
        scaled_data = scaler.fit_transform(data_to_scale)
        

        # 使用归一化的数据创建模型输入
        X_new = scaled_data.reshape(1, time_steps, len(features))


        # 检查X_new是否包含NaN值
        if np.isnan(X_new).any():
            return None, historical_data
        else:
            # 进行预测
            prediction_proba = model.predict(X_new,verbose=0)
            probability_of_one = prediction_proba[0][0]

            return probability_of_one, historical_data
    else:
        # 数据不足以进行预测
        return None, historical_data



# Initialize historical_data with the correct column names and types if necessary
historical_data = pd.DataFrame()
scaled_historical_data = pd.DataFrame()

initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 0.8
sold_threshold = 0.4
transactions = []
minute_count = 0

for idx, row in test_data.iterrows():
    current_probability, historical_data = predict_next_move(row, model,time_steps,historical_data,scaler) 
    # print(current_probability)
    # print(row['trade_time'])
    if current_probability is not None:
        current_price = row['last_price']
        minute_count = minute_count + 1
        if current_probability > buy_threshold and stock_quantity == 0 :
            print(current_probability)
            stock_quantity = funds // current_price
            funds -= stock_quantity * current_price
            fee = stock_quantity * 2
            funds -= fee
            stock_price = current_price
            buy_price = current_price  # 记录买入价格
            print(f"Time: {row['datetime']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['datetime'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 100 and stock_quantity > 0 and current_probability<sold_threshold:
            print(current_probability)
            funds += stock_quantity * current_price
            price_diff = current_price - buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['datetime']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['datetime'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            buy_price = 0  # 重置买入价格为0

    
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick.csv')

In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from keras.models import load_model
from datetime import datetime, time
model = load_model('model_taobao_lstm.h5')

def parse_time_range(time_range_str):
    """解析时间范围字符串并返回时间对象的开始和结束时间"""
    start_str, end_str = time_range_str.split('-')
    start_time = datetime.strptime(start_str, "%H:%M").time()
    end_time = datetime.strptime(end_str, "%H:%M").time()
    return start_time, end_time

def is_time_in_ranges(time_to_check, time_ranges):
    """判断给定时间是否在时间范围数组内"""
    for time_range in time_ranges:
        start_time, end_time = parse_time_range(time_range)
        if start_time <= time_to_check <= end_time:
            return True
    return False


def prepare_data_for_prediction(test_data, time_steps, scaler):

    # 使用归一化
    scaled_data = scaler.fit_transform(test_data[features].dropna())

    # 重塑数据以适应模型
    X = np.array([scaled_data[i:i+time_steps] for i in range(len(scaled_data)-time_steps+1)])
    return X


# 定义时间范围数组
notrade_time = ["11:20-11:30","14:50-15:00","0:30-1:00"]
# 准备数据
X_test = prepare_data_for_prediction(test_data, time_steps, scaler)
# 批量预测
predictions = model.predict(X_test, verbose=0)
# 确保test_data的索引与predictions对齐
aligned_test_data = test_data.iloc[time_steps - 1:]


initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 0.8
sold_threshold = 0.3
transactions = []
minute_count = 0
for prediction,  (index, row)  in zip(predictions, aligned_test_data.iterrows()):
    current_probability = prediction[0]
    time, nano_part = row['datetime'].split('.')
    if is_time_in_ranges(datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
.time(),notrade_time):
            continue
    if current_probability is not None:
        current_price = row['last_price']
        buy1_price = row['bid_price1']
        sell1_price = row['ask_price1']
        minute_count = minute_count + 1
            
        if current_probability > buy_threshold and stock_quantity == 0 and current_price!=sell1_price :
            print(current_probability)
            stock_quantity = funds // current_price
            funds -= stock_quantity * current_price
            fee = stock_quantity * 2
            funds -= fee
            stock_price = current_price
            buy_price = current_price  # 记录买入价格
            print(f"Time: {row['datetime']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['datetime'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 100 and stock_quantity > 0 and current_probability<sold_threshold and current_price!=buy1_price:
            print(current_probability)
            funds += stock_quantity * current_price
            price_diff = current_price - buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['datetime']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['datetime'],
                'price': current_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            buy_price = 0  # 重置买入价格为0

    
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick_lstm.csv')







0.84668756
Time: 2023-10-25 09:03:14 - Action: BUY at 14750.0, Quantity: 6.0,Funds:11488.0
0.27820057
Time: 2023-10-25 09:04:58 - Action: SELL at 14770.0, Quantity: 6.0,Funds:100108.0, Price Change: 涨
0.9054644
Time: 2023-10-25 09:05:04 - Action: BUY at 14765.0, Quantity: 6.0,Funds:11506.0
0.29193515
Time: 2023-10-25 09:06:34 - Action: SELL at 14775.0, Quantity: 6.0,Funds:100156.0, Price Change: 涨
0.8477657
Time: 2023-10-25 09:06:47 - Action: BUY at 14765.0, Quantity: 6.0,Funds:11554.0
0.27052036
Time: 2023-10-25 09:09:12 - Action: SELL at 14770.0, Quantity: 6.0,Funds:100174.0, Price Change: 涨
0.85874116
Time: 2023-10-25 09:09:17 - Action: BUY at 14760.0, Quantity: 6.0,Funds:11602.0
0.17200416
Time: 2023-10-25 09:11:52 - Action: SELL at 14770.0, Quantity: 6.0,Funds:100222.0, Price Change: 涨
0.93956536
Time: 2023-10-25 09:11:53 - Action: BUY at 14760.0, Quantity: 6.0,Funds:11650.0
0.1636614
Time: 2023-10-25 09:13:54 - Action: SELL at 14765.0, Quantity: 6.0,Funds:100240.0, Price Change: 