In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
import warnings
warnings.filterwarnings('ignore')

# 加载数据
data_clean = pd.read_csv("future_taobao_ss2401_tick.csv")

# 数据预处理
data_clean['datetime'] = pd.to_datetime(data_clean['datetime'])
data_clean = data_clean.sort_values('datetime')


# 定义未来价格变化的标签
data_clean['future_price'] = data_clean['bid_price1'].shift(-100)
data_clean['label'] = (data_clean['future_price'] > data_clean['bid_price1']).astype(int)

# 删除由于移位导致的NaN值
data_clean.dropna(inplace=True)

# 取买卖各五档数据
bid_columns = ['bid_price' + str(i) for i in range(1, 6)] + ['bid_volume' + str(i) for i in range(1, 6)]
ask_columns = ['ask_price' + str(i) for i in range(1, 6)] + ['ask_volume' + str(i) for i in range(1, 6)]
selected_columns = bid_columns + ask_columns
data_selected = data_clean[['datetime'] + selected_columns]

# 构建价格-时间矩阵
def create_price_time_matrix(data, price_columns, volume_columns):
    matrix_list = []
    for price_col, volume_col in zip(price_columns, volume_columns):
        pivot_table = data.pivot_table(index=price_col, columns='datetime', values=volume_col, fill_value=0)
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_matrix = scaler.fit_transform(pivot_table)
        matrix_list.append(scaled_matrix)
    return matrix_list

# 为买卖五档价格和交易量创建矩阵
bid_price_columns = ['bid_price' + str(i) for i in range(1, 6)]
bid_volume_columns = ['bid_volume' + str(i) for i in range(1, 6)]
ask_price_columns = ['ask_price' + str(i) for i in range(1, 6)]
ask_volume_columns = ['ask_volume' + str(i) for i in range(1, 6)]

bid_matrices = create_price_time_matrix(data_selected, bid_price_columns, bid_volume_columns)
ask_matrices = create_price_time_matrix(data_selected, ask_price_columns, ask_volume_columns)

def integrate_matrices(bid_matrices, ask_matrices):
    integrated_data = []
    for bid_matrix, ask_matrix in zip(bid_matrices, ask_matrices):
        # 将二维矩阵转换为三维矩阵，形状为 [行数, 列数, 1]
        bid_matrix_3d = np.expand_dims(bid_matrix, axis=-1)
        ask_matrix_3d = np.expand_dims(ask_matrix, axis=-1)

        # 确保买卖矩阵在时间轴上的维度一致
        min_time_steps = min(bid_matrix_3d.shape[1], ask_matrix_3d.shape[1])
        integrated_matrix = np.concatenate((bid_matrix_3d[:, :min_time_steps, :], 
                                            ask_matrix_3d[:, :min_time_steps, :]), axis=2)
        integrated_data.append(integrated_matrix)

    # 合并所有整合后的矩阵
    return np.concatenate(integrated_data, axis=0)


# 整合买卖五档数据
X_data = integrate_matrices(bid_matrices, ask_matrices)

# 归一化处理（可选）
scaler = MinMaxScaler(feature_range=(0, 1))
X_data_scaled = scaler.fit_transform(X_data.reshape(-1, X_data.shape[-1])).reshape(X_data.shape)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_data_scaled, data_clean['label'].values, test_size=0.2, random_state=42)

# 定义模型
model = Sequential()
model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')

# 模型训练
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

# 保存模型
model.save('model_taobao_lstm_combined.h5')

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from keras.models import load_model
from datetime import datetime, time
model = load_model('model_taobao_lstm_combined.h5')

def parse_time_range(time_range_str):
    """解析时间范围字符串并返回时间对象的开始和结束时间"""
    start_str, end_str = time_range_str.split('-')
    start_time = datetime.strptime(start_str, "%H:%M").time()
    end_time = datetime.strptime(end_str, "%H:%M").time()
    return start_time, end_time

def is_time_in_ranges(time_to_check, time_ranges):
    """判断给定时间是否在时间范围数组内"""
    for time_range in time_ranges:
        start_time, end_time = parse_time_range(time_range)
        if start_time <= time_to_check <= end_time:
            return True
    return False


# 定义时间范围数组
notrade_time = ["11:20-11:30","14:50-15:00","0:30-1:00"]


# 批量预测
predictions = model.predict(X_test, verbose=0)
# 确保test_data的索引与predictions对齐
aligned_test_data = test_data.iloc[time_steps - 1:]


initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 0.9
sold_threshold = 0.5
transactions = []
minute_count = 0
for prediction,  (index, row)  in zip(predictions, aligned_test_data.iterrows()):
    current_probability = prediction[0]
    print(current_probability)
    # print(row['datetime'])
    if '.' in str(row['datetime']):
        # 如果有小数点，分割为主时间部分和纳秒部分
        time, nano_part = str(row['datetime']).split('.')
    else:
        # 如果没有小数点，则没有纳秒部分
        time = str(row['datetime'])
        nano_part = '0'
    if is_time_in_ranges(datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
.time(),notrade_time):
            continue
    if current_probability is not None:
        current_price = row['last_price']
        buy_price = current_price
        sell_price = current_price
        # buy_price = row['bid_price1']
        # sell_price = row['ask_price1']
        minute_count = minute_count + 1
            
        if current_probability > buy_threshold and stock_quantity == 0 and row['bid_volume1']<20:
            print(current_probability)
            stock_quantity = funds // buy_price
            funds -= stock_quantity * buy_price
            fee = stock_quantity * 2
            funds -= fee
            last_buy_price = buy_price  # 记录买入价格
            print(f"Time: {row['datetime']} - Action: BUY at {buy_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['datetime'],
                'price': buy_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 100 and stock_quantity > 0 and current_probability<sold_threshold and row['ask_volume1']<20:
            print(current_probability)
            funds += stock_quantity * sell_price
            price_diff = sell_price - last_buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['datetime']} - Action: SELL at {sell_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['datetime'],
                'price': sell_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            last_buy_price = 0  # 重置买入价格为0

    
if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick_lstm.csv')

2023-12-14 15:20:02.537436: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14618 MB memory:  -> device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:b1:00.0, compute capability: 7.0
2023-12-14 15:20:13.354880: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8900


0.923812
0.7549838
0.1272598
0.93574256
0.92039794
0.95395803
0.9612454
0.9638708
0.69645983
0.92830646
0.5331568
0.9219798
0.61062765
0.92442065
0.952078
0.6489646
0.9294599
0.94857603
0.9571711
0.9571711
Time: 2023-11-20 09:02:41 - Action: BUY at 13930.0, Quantity: 7.0,Funds:2476.0
0.96055156
0.96044016
0.95147663
0.9411943
0.21542579
0.92432773
0.48890588
0.08807552
0.12641704
0.13772877
0.95045614
0.9382474
0.955151
0.9447067
0.9363963
0.19838415
0.9101098
0.37993073
0.832755
0.9450562
0.39792964
0.12700093
0.91189444
0.6713319
0.103822626
0.91844666
0.9126637
0.95123017
0.9654651
0.57936746
0.90440124
0.47080982
0.12535349
0.12462439
0.9629092
0.69218737
0.8874904
0.961018
0.5553595
0.1334616
0.9313868
0.73469275
0.11211374
0.13544644
0.19521253
0.9679488
0.64165497
0.09138765
0.9094739
0.9366399
0.9591125
0.42756042
0.09266711
0.11304624
0.1936791
0.14631943
0.9582346
0.6263498
0.89246595
0.9557005
0.39472735
0.88818574
0.959287
0.9570649
0.9618146
0.9559849
0.9504205
0.94525033
