In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# 1. 数据加载和预处理
data = pd.read_csv("merged_sorted_file1.csv")
# data = pd.read_csv("tick_data/jq_600518.XSHG.csv")
# data = pd.read_csv("tick_data/jq_000656.XSHE.csv")
# data = pd.read_csv("tick_data/jq_002310.XSHE.csv")

# data = pd.read_csv("600518.csv")
# data['time'] = data['trade_time']
# data['current'] = data['close'] 
# data = data.sort_values(by='time')

# data = pd.read_csv("baba_corrected.csv")
# data = pd.read_csv("INDA.csv")
# data['time'] = data['date']
# data['current'] = data['close'] 

# data['time'] = pd.to_datetime(data['date'].astype(str) + ' ' + data['datetime'])
# # 根据'time'列进行升序排序
# data = data.sort_values(by='time')

data_clean = data.dropna(subset=['current']).copy()



# 2. 特征生成
# Calculate rolling mean and standard deviation
data_clean['rolling_mean'] = data_clean['current'].rolling(window=5).mean()
data_clean['rolling_std'] = data_clean['current'].rolling(window=5).std()
data_clean = data_clean.fillna(data_clean.median())

# Calculate RSI
delta = data_clean['current'].diff()
gain = (delta.where(delta > 0, 0)).fillna(0)
loss = (-delta.where(delta < 0, 0)).fillna(0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
data_clean['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD
short_ema = data_clean['current'].ewm(span=12, adjust=False).mean()
long_ema = data_clean['current'].ewm(span=26, adjust=False).mean()
data_clean['MACD'] = short_ema - long_ema
data_clean['MACD_signal'] = data_clean['MACD'].ewm(span=9, adjust=False).mean()

# Shift RSI and MACD to use them as features for next timestep
data_clean['RSI_shifted'] = data_clean['RSI'].shift(1)
data_clean['MACD_shifted'] = data_clean['MACD'].shift(1)
data_clean['MACD_signal_shifted'] = data_clean['MACD_signal'].shift(1)

# Define label
# data_clean['label'] = (data_clean['current'].shift(-200) > data_clean['current']).astype(int)
data_clean['label'] = data_clean['current'].shift(-200) - data_clean['current']



# 3. 分割数据
data_clean['date_only'] = pd.to_datetime(data_clean['time']).dt.date
# Updated the data split to use 'date_only'
first_date = data_clean['date_only'].iloc[0]
# first_month_data = data_clean[data_clean['date_only'] <= first_date + pd.Timedelta(days=10)]
first_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=20)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=30))]
features = ['current', 'rolling_mean', 'rolling_std', 'RSI_shifted', 'MACD_shifted', 'MACD_signal_shifted']
X_first_month = first_month_data[features]
y_first_month = first_month_data['label']

X_train_month_clean = X_first_month.dropna()
y_train_month_clean = y_first_month[X_train_month_clean.index]


# 4. 模型训练
# rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)

rf.fit(X_train_month_clean, y_train_month_clean)  # Use the cleaned data for training



In [4]:
#涨幅预测
# 5. 使用第二个月的数据进行预测
second_month_data = data_clean[(data_clean['date_only'] > first_date + pd.Timedelta(days=30)) & 
                               (data_clean['date_only'] <= first_date + pd.Timedelta(days=40))]

predicted_changes = []

# 预测的初始化
probabilities_second_month = []

initial_funds = 100000
funds = initial_funds
stock_quantity = 0
stock_price = 0
transactions = []

# 在逐行预测和模拟交易部分，初始化买入价格为0
buy_price = 0
minute_count = 0


# 修改为预测涨跌幅度，而非概率
buy_threshold = 10  # Define a threshold for deciding to buy (e.g., expecting a 2-unit increase)
sold_threshold = -10

for idx, row in second_month_data.iterrows():
    current_row = row[features].fillna(method='ffill')
    predicted_change = rf.predict([current_row])[0]
    predicted_changes.append(predicted_change)  # Store predicted changes for analysis
    current_price = row['current']
    minute_count += 1
    
    flag = 1
    time_string = row['time'][-8:]  # 获取时间部分，例如 "22:59:10"
    if time_string.startswith("11:29:") or time_string.startswith("14:59:") or time_string.startswith("22:59:"):
        flag = 0

    if predicted_change > buy_threshold and stock_quantity == 0 and flag==1:
        print(predicted_change)
        stock_quantity = funds // current_price
        funds -= stock_quantity * current_price
        buy_price = current_price  # 记录买入价格
        print(f"Time: {row['time']} - Action: BUY at {current_price}, Quantity: {stock_quantity}, Funds: {funds}")
        transactions.append({
            'action': 'buy',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds
        })
        minute_count = 0
    # elif minute_count >= 200 and stock_quantity > 0:
    # elif predicted_change < sold_threshold and stock_quantity>0 and minute_count>=200 or  stock_quantity>0  and flag==0:
    elif predicted_change < sold_threshold and stock_quantity>0 and minute_count>=200:
        print(predicted_change)
        funds += stock_quantity * current_price
        price_diff = current_price - buy_price  # 计算价格差异
        # 判断价格差异是涨、跌还是平
        if price_diff > 0:
            direction = '涨'
        elif price_diff < 0:
            direction = '跌'
        else:
            direction = '平'
        print(f"Time: {row['time']} - Action: SELL at {current_price}, Quantity: {stock_quantity}, Funds: {funds}, Price Change: {direction}")
        transactions.append({
            'action': 'sell',
            'time': row['time'],
            'price': current_price,
            'quantity': stock_quantity,
            'funds_remaining': funds,
            'price_change': direction
        })
        stock_quantity = 0
        buy_price = 0  # 重置买入价格为0

transactions_df = pd.DataFrame(transactions)
print(transactions_df['price_change'].value_counts())
transactions_df.to_csv('transactions_regression.csv')


13.535
Time: 2023-08-03 09:47:23 - Action: BUY at 3767.0, Quantity: 26.0, Funds: 2058.0


KeyError: 'price_change'