In [21]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# 1. 数据加载和预处理
data = pd.read_csv("ru2408.csv",encoding="gb2312")

data['time'] = pd.to_datetime(data['date'].astype(str) + ' ' + data['datetime'])
data = data.sort_values(by='time')
data_clean = data.dropna(subset=['current']).copy()


# 2. 特征生成
# Calculate rolling mean and standard deviation
data_clean['rolling_mean'] = data_clean['current'].rolling(window=1000).mean()
data_clean['rolling_std'] = data_clean['current'].rolling(window=1000).std()
# data_clean = data_clean.fillna(data_clean.median())

# Calculate RSI
delta = data_clean['current'].diff()
gain = (delta.where(delta > 0, 0)).fillna(0)
loss = (-delta.where(delta < 0, 0)).fillna(0)
avg_gain = gain.rolling(window=2500).mean()
avg_loss = loss.rolling(window=2500).mean()
rs = avg_gain / avg_loss
data_clean['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
short_ema = data_clean['current'].ewm(span=300, adjust=False).mean()
long_ema = data_clean['current'].ewm(span=1250, adjust=False).mean()
data_clean['MACD'] = short_ema - long_ema
data_clean['MACD_signal'] = data_clean['MACD'].ewm(span=800, adjust=False).mean()

# Shift RSI and MACD to use them as features for next timestep
data_clean['RSI_shifted'] = data_clean['RSI'].shift(1)
data_clean['MACD_shifted'] = data_clean['MACD'].shift(1)
data_clean['MACD_signal_shifted'] = data_clean['MACD_signal'].shift(1)

# Define label
data_clean['label'] = (data_clean['current'].shift(-200) > data_clean['current']).astype(int)



# 3. 分割数据
data_clean['date_only'] = pd.to_datetime(data_clean['time']).dt.date
# Updated the data split to use 'date_only'
first_date = data_clean['date_only'].iloc[0]
first_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=0)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=10))]
features = ['current', 'rolling_mean', 'rolling_std', 'RSI_shifted', 'MACD_shifted', 'MACD_signal_shifted']
X_first_month = first_month_data[features]
y_first_month = first_month_data['label']

print(X_first_month)
X_train_month_clean = X_first_month.dropna()
print(X_train_month_clean)
y_train_month_clean = y_first_month[X_train_month_clean.index]


# 4. 模型训练
rf = RandomForestClassifier(n_estimators=100, random_state=42)
# rf = RandomForestRegressor(n_estimators=100, random_state=42)

rf.fit(X_train_month_clean, y_train_month_clean)  # Use the cleaned data for training



       current  rolling_mean  rolling_std  RSI_shifted  MACD_shifted  \
13199  14460.0     13915.195   120.212262    77.272727    116.588884   
13200  14460.0     13915.900   121.333886    83.974359    118.643684   
13201  14460.0     13916.605   122.441173    83.974359    120.680308   
13202  14460.0     13917.310   123.534508    83.974359    122.698883   
13203  14460.0     13918.015   124.614259    83.974359    124.699537   
...        ...           ...          ...          ...           ...   
81473  14350.0     14349.540     2.427817    84.000000     14.113721   
81474  14350.0     14349.540     2.427817    84.000000     14.097515   
81475  14350.0     14349.540     2.427817    84.000000     14.081292   
81476  14350.0     14349.540     2.427817    84.000000     14.065053   
81477  14350.0     14349.540     2.427817    84.000000     14.048798   

       MACD_signal_shifted  
13199           116.575107  
13200           116.580272  
13201           116.590509  
13202           116

In [23]:
#涨跌预测
import collections
# 5. 使用第二个月的数据进行预测
second_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=10)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=30))]

# 预测的初始化
probabilities_second_month = []
# probabilities_queue = collections.deque(maxlen=10000)


initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
bug_threshold = 0.8
sold_threshold = 0.4
transactions = []

# 在逐行预测和模拟交易部分，初始化买入价格为0
buy_price = 0
minute_count = 0
daily_transaction_count = 0  # 新增：每天的交易次数计数器

for idx, row in second_month_data.iterrows():
    current_row = row[features].fillna(method='ffill')
    current_probability = rf.predict_proba([current_row])[0][1]
        
    current_price = row['current']
    minute_count = minute_count + 1

    flag = 1
    time_string = row['time'].strftime('%H:%M:%S')
    # time_string = row['time'][-8:]  # 获取时间部分，例如 "22:59:10"
    # if time_string.startswith("11:29:") or time_string.startswith("14:59:") or time_string.startswith("22:59:"):
    if time_string.startswith("00:59:") or time_string.startswith("01:00:"):
        daily_transaction_count = 0
        flag = 0
    # print(current_probability)
    if current_probability > bug_threshold and stock_quantity == 0 and flag==1:
        print(current_probability)
        daily_transaction_count += 1  # 新增：每次交易后增加计数器
        stock_quantity = funds // current_price
        funds -= stock_quantity * current_price
        stock_price = current_price
        # fee = max(stock_quantity * current_price * 0.0001, 5)
        fee = 2
        funds -= fee
        buy_price = current_price  # 记录买入价格
        print(f"Time: {row['time']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
        transactions.append({
            'action': 'buy',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds
        })
        minute_count = 0
    # elif minute_count >= 500 and stock_quantity > 0 or (stock_quantity > 0 and flag==0):
    elif current_probability < sold_threshold and stock_quantity > 0 and minute_count>200 or (stock_quantity > 0 and flag==0):

        print(current_probability)
        funds += stock_quantity * current_price
        # fee = max(stock_quantity * current_price * 0.0001, 5)
        # funds -= fee
        price_diff = current_price - buy_price  # 计算价格差异
        # 判断价格差异是涨、跌还是平
        if price_diff > 0:
            direction = '涨'
        elif price_diff < 0:
            direction = '跌'
        else:
            direction = '平'
        print(f"Time: {row['time']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
        transactions.append({
            'action': 'sell',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds,
            'price_change': direction
        })
        stock_quantity = 0
        buy_price = 0  # 重置买入价格为0
    
    

transactions_df = pd.DataFrame(transactions)
print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_classify.csv')

KeyboardInterrupt: 