In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')

# 数据加载和预处理
data_clean = pd.read_csv("future_taobao_ss2401_tick.csv")
data_clean['last_price'] = pd.to_numeric(data_clean['last_price'], errors='coerce')
data_clean['datetime'] = pd.to_datetime(data_clean['datetime'])

# Calculate rolling mean and standard deviation
data_clean['rolling_mean'] = data_clean['last_price'].rolling(window=500).mean()
data_clean['rolling_std'] = data_clean['last_price'].rolling(window=500).std()

# Calculate RSI
delta = data_clean['last_price'].diff()
gain = (delta.where(delta > 0, 0)).fillna(0)
loss = (-delta.where(delta < 0, 0)).fillna(0)
avg_gain = gain.rolling(window=1000).mean()
avg_loss = loss.rolling(window=1000).mean()
rs = avg_gain / avg_loss
data_clean['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
short_ema = data_clean['last_price'].ewm(span=200, adjust=False).mean()
long_ema = data_clean['last_price'].ewm(span=1000, adjust=False).mean()
data_clean['MACD'] = short_ema - long_ema
data_clean['MACD_signal'] = data_clean['MACD'].ewm(span=800, adjust=False).mean()

# Shift RSI and MACD to use them as features for next timestep
data_clean['RSI_shifted'] = data_clean['RSI'].shift(1)
data_clean['MACD_shifted'] = data_clean['MACD'].shift(1)
data_clean['MACD_signal_shifted'] = data_clean['MACD_signal'].shift(1)

# Define label
data_clean['label'] = (data_clean['bid_price1'].shift(-200) > data_clean['bid_price1']).astype(int)

# features = ['last_price', 'volume','bid_volume1','bid_volume2','bid_volume3','bid_volume4','bid_volume5','ask_volume1','ask_volume2','ask_volume3','ask_volume4','ask_volume5','highest','lowest','bid_price1','ask_price1','bid_price2','ask_price2','bid_price3','ask_price3','bid_price4','ask_price4','bid_price5','ask_price5'] 
features = ['rolling_mean', 'rolling_std', 'RSI_shifted', 'MACD_shifted', 'MACD_signal_shifted','last_price', 'volume','bid_volume1','bid_volume2','bid_volume3','bid_volume4','bid_volume5','ask_volume1','ask_volume2','ask_volume3','ask_volume4','ask_volume5','highest','lowest','bid_price1','ask_price1','bid_price2','ask_price2','bid_price3','ask_price3','bid_price4','ask_price4','bid_price5','ask_price5'] 


# 分割数据
train_data = data_clean[(data_clean['datetime'] >= '2023-08-31 09:00:00') &
                        (data_clean['datetime'] < '2023-11-10 09:00:00')]
test_data = data_clean[(data_clean['datetime'] >= '2023-11-10 09:00:00') &
                       (data_clean['datetime'] < '2023-11-30 09:00:00')]

# 将 DataFrame 转换为 NumPy 数组
X_train = train_data[features].values
y_train = train_data['label'].values
X_test = test_data[features].values
y_test = test_data['label'].values

# 删除 NaN 值（如果有的话）
nan_mask = ~np.isnan(X_train).any(axis=1)
X_train = X_train[nan_mask]
y_train = y_train[nan_mask]

nan_mask_test = ~np.isnan(X_test).any(axis=1)
X_test = X_test[nan_mask_test]
y_test = y_test[nan_mask_test]

# 创建并训练随机森林模型
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

# 保存模型（如果需要）
import joblib
joblib.dump(rf_model, 'model_taobao_rf.pkl')

['model_taobao_rf.pkl']

In [2]:
import pandas as pd
import numpy as np
import joblib
from datetime import datetime

# 加载随机森林模型
model = joblib.load('model_taobao_rf.pkl')

# 使用predict_proba方法进行预测
prob_predictions = model.predict_proba(test_data[features])

# 我们关注的是属于正类的概率，即第二列数据
positive_class_probabilities = prob_predictions[:, 1]

# 用概率进行决策的示例逻辑
initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
buy_threshold = 0.8
sold_threshold = 0.5
transactions = []
minute_count = 0
last_buy_price = 0

def parse_time_range(time_range_str):
    """解析时间范围字符串并返回时间对象的开始和结束时间"""
    start_str, end_str = time_range_str.split('-')
    start_time = datetime.strptime(start_str, "%H:%M").time()
    end_time = datetime.strptime(end_str, "%H:%M").time()
    return start_time, end_time

def is_time_in_ranges(time_to_check, time_ranges):
    """判断给定时间是否在时间范围数组内"""
    for time_range in time_ranges:
        start_time, end_time = parse_time_range(time_range)
        if start_time <= time_to_check <= end_time:
            return True
    return False

# 定义时间范围数组
notrade_time = ["11:20-11:30","14:50-15:00","0:00-1:00"]

for current_probability, (index, row) in zip(positive_class_probabilities, test_data.iterrows()):
    if '.' in str(row['datetime']):
        # 如果有小数点，分割为主时间部分和纳秒部分
        time, nano_part = str(row['datetime']).split('.')
    else:
        # 如果没有小数点，则没有纳秒部分
        time = str(row['datetime'])
        nano_part = '0'
    
    datetime_obj = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    #尾盘清仓
    if is_time_in_ranges(datetime_obj.time(),notrade_time) and stock_quantity > 0:
        print(current_probability)
        funds += stock_quantity * sell_price
        price_diff = sell_price - last_buy_price  # 计算价格差异
        # 判断价格差异是涨、跌还是平
        if price_diff > 0:
            direction = '涨'
        elif price_diff < 0:
            direction = '跌'
        else:
            direction = '平'
        print(f"Time: {row['datetime']} - Action: SELL at {sell_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
        transactions.append({
            'action': 'sell',
            'time': row['datetime'],
            'price': sell_price,
            'quantity': stock_quantity,
            'funds_remaining': funds,
            'price_change': direction
        })
        stock_quantity = 0
        last_buy_price = 0  # 重置买入价格为0
    #禁止交易    
    if is_time_in_ranges(datetime_obj.time(),notrade_time):
            continue
    
    if current_probability is not None:
        current_price = row['last_price']
        buy_price = row['bid_price1']
        sell_price = row['bid_price1']
        minute_count = minute_count + 1
            
        if current_probability > buy_threshold and stock_quantity == 0 :
            print(current_probability)
            stock_quantity = funds // buy_price
            funds -= stock_quantity * buy_price
            fee = stock_quantity * 2
            funds -= fee
            stock_price = buy_price
            last_buy_price = buy_price  # 记录买入价格
            print(f"Time: {row['datetime']} - Action: BUY at {buy_price}, Quantity: {stock_quantity},Funds:{funds}")
            transactions.append({
                'action': 'buy',
                'time': row['datetime'],
                'price': buy_price,
                'quantity': stock_quantity,
                'funds_remaining': funds
            })
            minute_count = 0
        elif minute_count > 200 and stock_quantity > 0 and current_probability<sold_threshold :
        # elif minute_count > 200 and stock_quantity > 0:
            print(current_probability)
            funds += stock_quantity * sell_price
            price_diff = sell_price - last_buy_price  # 计算价格差异
            # 判断价格差异是涨、跌还是平
            if price_diff > 0:
                direction = '涨'
            elif price_diff < 0:
                direction = '跌'
            else:
                direction = '平'
            print(f"Time: {row['datetime']} - Action: SELL at {sell_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
            transactions.append({
                'action': 'sell',
                'time': row['datetime'],
                'price': sell_price,
                'quantity': stock_quantity,
                'funds_remaining': funds,
                'price_change': direction
            })
            stock_quantity = 0
            last_buy_price = 0  # 重置买入价格为0


if len(transactions)>0:
    transactions_df = pd.DataFrame(transactions)
    print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_tick_lstm.csv')


0.82
Time: 2023-11-10 23:43:39 - Action: BUY at 14315.0, Quantity: 6.0,Funds:14098.0
0.755
Time: 2023-11-11 00:00:00.500000 - Action: SELL at 14300.0, Quantity: 6.0,Funds:99898.0, Price Change: 跌
0.81
Time: 2023-11-13 09:13:59.500000 - Action: BUY at 14190.0, Quantity: 7.0,Funds:554.0
0.49
Time: 2023-11-13 10:11:01.500000 - Action: SELL at 14160.0, Quantity: 7.0,Funds:99674.0, Price Change: 跌
0.81
Time: 2023-11-13 23:40:42 - Action: BUY at 14145.0, Quantity: 7.0,Funds:645.0
0.67
Time: 2023-11-14 00:00:01 - Action: SELL at 14135.0, Quantity: 7.0,Funds:99590.0, Price Change: 跌
0.81
Time: 2023-11-14 08:59:00.500000 - Action: BUY at 14150.0, Quantity: 7.0,Funds:526.0
0.635
Time: 2023-11-14 11:20:00 - Action: SELL at 14115.0, Quantity: 7.0,Funds:99331.0, Price Change: 跌
0.805
Time: 2023-11-15 09:00:22.500000 - Action: BUY at 14095.0, Quantity: 7.0,Funds:652.0
0.615
Time: 2023-11-15 11:20:00.500000 - Action: SELL at 14105.0, Quantity: 7.0,Funds:99387.0, Price Change: 涨
0.805
Time: 2023-11-17