In [16]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# 1. 数据加载和预处理
data = pd.read_csv("merged_sorted_file1.csv")

data_clean = data.dropna(subset=['current']).copy()


# 2. 特征生成
# Calculate rolling mean and standard deviation
data_clean['rolling_mean'] = data_clean['current'].rolling(window=5).mean()
data_clean['rolling_std'] = data_clean['current'].rolling(window=5).std()
data_clean = data_clean.fillna(data_clean.median())

# Calculate RSI
delta = data_clean['current'].diff()
gain = (delta.where(delta > 0, 0)).fillna(0)
loss = (-delta.where(delta < 0, 0)).fillna(0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
data_clean['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
short_ema = data_clean['current'].ewm(span=12, adjust=False).mean()
long_ema = data_clean['current'].ewm(span=26, adjust=False).mean()
data_clean['MACD'] = short_ema - long_ema
data_clean['MACD_signal'] = data_clean['MACD'].ewm(span=9, adjust=False).mean()

# Shift RSI and MACD to use them as features for next timestep
data_clean['RSI_shifted'] = data_clean['RSI'].shift(1)
data_clean['MACD_shifted'] = data_clean['MACD'].shift(1)
data_clean['MACD_signal_shifted'] = data_clean['MACD_signal'].shift(1)

# Define label
data_clean['label'] = (data_clean['current'].shift(-120) > data_clean['current']).astype(int)
# data_clean['label'] = data_clean['current'].shift(-200) - data_clean['current']



# 3. 分割数据
data_clean['date_only'] = pd.to_datetime(data_clean['time']).dt.date
# Updated the data split to use 'date_only'
first_date = data_clean['date_only'].iloc[0]
# first_month_data = data_clean[data_clean['date_only'] <= first_date + pd.Timedelta(days=10)]
first_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=20)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=30))]
features = ['current', 'rolling_mean', 'rolling_std', 'RSI_shifted', 'MACD_shifted', 'MACD_signal_shifted']
X_first_month = first_month_data[features]
y_first_month = first_month_data['label']

X_train_month_clean = X_first_month.dropna()
y_train_month_clean = y_first_month[X_train_month_clean.index]


# 4. 模型训练
rf = RandomForestClassifier(n_estimators=100, random_state=42)
# rf = RandomForestRegressor(n_estimators=100, random_state=42)

rf.fit(X_train_month_clean, y_train_month_clean)  # Use the cleaned data for training



In [17]:
#涨跌预测
# 5. 使用第二个月的数据进行预测
second_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=30)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=40))]

# 预测的初始化
probabilities_second_month = []

initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
threshold = 0.95
transactions = []

# 在逐行预测和模拟交易部分，初始化买入价格为0
buy_price = 0
minute_count = 0
for idx, row in second_month_data.iterrows():
    current_row = row[features].fillna(method='ffill')
    current_probability = rf.predict_proba([current_row])[0][1]
    current_price = row['current']
    minute_count = minute_count + 1
    # print(current_probability)
    flag = 1
    time_string = row['time'][-8:]  # 获取时间部分，例如 "22:59:10"
    if time_string.startswith("11:29:") or time_string.startswith("14:59:") or time_string.startswith("22:59:"):
        flag = 0
        
    if current_probability > threshold and stock_quantity == 0 and flag==1:
        print(current_probability)
        stock_quantity = funds // current_price
        funds -= stock_quantity * current_price
        stock_price = current_price
        # fee = max(stock_quantity * current_price * 0.0003, 5)
        # funds -= fee
        buy_price = current_price  # 记录买入价格
        print(f"Time: {row['time']} - Action: BUY at {current_price}, Quantity: {stock_quantity},Funds:{funds}")
        transactions.append({
            'action': 'buy',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds
        })
        minute_count = 0
    # elif minute_count >= 120 and stock_quantity > 0 :
    elif current_probability < 1-threshold and stock_quantity > 0 and minute_count>120:
        print(current_probability)
        funds += stock_quantity * current_price
        # fee = max(stock_quantity * current_price * 0.0003, 5)
        # funds -= fee
        price_diff = current_price - buy_price  # 计算价格差异
        # 判断价格差异是涨、跌还是平
        if price_diff > 0:
            direction = '涨'
        elif price_diff < 0:
            direction = '跌'
        else:
            direction = '平'
        print(f"Time: {row['time']} - Action: SELL at {current_price}, Quantity: {stock_quantity},Funds:{funds}, Price Change: {direction}")
        transactions.append({
            'action': 'sell',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds,
            'price_change': direction
        })
        stock_quantity = 0
        buy_price = 0  # 重置买入价格为0
    
    

transactions_df = pd.DataFrame(transactions)
print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions1.csv')

1.0
Time: 2023-08-03 09:00:20 - Action: BUY at 3742.0, Quantity: 26.0,Funds:2708.0
0.04
Time: 2023-08-03 09:08:36 - Action: SELL at 3759.0, Quantity: 26.0,Funds:100442.0, Price Change: 涨
0.97
Time: 2023-08-03 09:09:40 - Action: BUY at 3756.0, Quantity: 26.0,Funds:2786.0
0.02
Time: 2023-08-03 09:12:24 - Action: SELL at 3758.0, Quantity: 26.0,Funds:100494.0, Price Change: 涨
0.99
Time: 2023-08-03 09:13:35 - Action: BUY at 3754.0, Quantity: 26.0,Funds:2890.0
0.05
Time: 2023-08-03 09:18:51 - Action: SELL at 3755.0, Quantity: 26.0,Funds:100520.0, Price Change: 涨
0.97
Time: 2023-08-03 09:19:05 - Action: BUY at 3757.0, Quantity: 26.0,Funds:2838.0
0.03
Time: 2023-08-03 09:23:28 - Action: SELL at 3760.0, Quantity: 26.0,Funds:100598.0, Price Change: 涨
1.0
Time: 2023-08-03 09:24:07 - Action: BUY at 3759.0, Quantity: 26.0,Funds:2864.0
0.03
Time: 2023-08-03 09:27:56 - Action: SELL at 3760.0, Quantity: 26.0,Funds:100624.0, Price Change: 涨
0.96
Time: 2023-08-03 09:43:47 - Action: BUY at 3763.0, Quanti