In [1]:
# 把1分K線資料轉換為日K線資料 output 原始
import os
import pandas as pd
from trade import TradingStrategy
from datetime import timedelta
import numpy as np

In [2]:
# 根目錄路徑
root_folder = "data"

# 股票清單

stock_list = ["2308","2303","2317","2330","2382","2454","2881","2882","2891","3711"]
# 年份清單
year_list = ["2023","2024"]

In [5]:
# 初始化所有資料的列表
all_data = []

# 遍歷股票清單
for stock in stock_list:
    stock_folder = os.path.join(root_folder, stock + "_kbars")
    if not os.path.exists(stock_folder):
        print(f"Stock folder {stock} does not exist.")
        continue

    # 遍歷年份清單
    for year in year_list:
        year_folder = os.path.join(stock_folder, year)
        if not os.path.exists(year_folder):
            print(f"Year folder {year} for stock {stock} does not exist.")
            continue

        # 遍歷年份資料夾中的所有檔案
        for file_name in os.listdir(year_folder):
            if file_name.endswith("_1min.csv"):
                file_path = os.path.join(year_folder, file_name)
                try:
                    # 讀取檔案
                    df = pd.read_csv(file_path)

                    # 檢查資料是否有內容
                    if df.empty:
                        continue

                    # 確保必要欄位存在
                    required_columns = ['date', 'open', 'high', 'low', 'close', 'volume', 'tic']
                    if not all(col in df.columns for col in required_columns):
                        print(f"File {file_name} is missing required columns.")
                        continue

                    # 將 'date' 欄位轉換為 datetime 格式
                    df['date'] = pd.to_datetime(df['date'])

                    # 排序資料，避免時間順序錯亂
                    df.sort_values(by='date', inplace=True)

                    # 檢查並填補缺失的時間資料
                    filled_data = []
                    for i in range(len(df) - 1):
                        current_row = df.iloc[i]
                        next_row = df.iloc[i + 1]
                        filled_data.append(current_row)

                        # 計算時間差
                        time_diff = next_row['date'] - current_row['date']
                        if time_diff > timedelta(minutes=1):
                            # 補上缺失的資料
                            missing_time = current_row['date'] + timedelta(minutes=1)
                            while missing_time < next_row['date']:
                                missing_row = current_row.copy()
                                missing_row['date'] = missing_time
                                filled_data.append(missing_row)
                                missing_time += timedelta(minutes=1)

                    # 添加最後一筆資料
                    filled_data.append(df.iloc[-1])

                    # 轉換回 DataFrame
                    df_filled = pd.DataFrame(filled_data)

                    # 添加到所有資料的列表
                    all_data.append(df_filled)

                except Exception as e:
                    print(f"Error processing file {file_name} in {year_folder}: {e}")

                except Exception as e:
                    print(f"Error processing file {file_name} in {year_folder}: {e}")

# 合併所有資料
if all_data:
    combined_data = pd.concat(all_data, ignore_index=True)
    print("All minute-level data loaded successfully.")

    # 將資料轉換為 NumPy 陣列
    numpy_data = combined_data.to_numpy()
    print("Data successfully converted to NumPy array.")
else:
    numpy_data = np.array([])
    print("No valid data found.")


All minute-level data loaded successfully.
Data successfully converted to NumPy array.


In [6]:

# 回測功能實現
def backtest(start_date, end_date, year):
    flag = False
    trade = TradingStrategy(model_path="model/LSTM_v3.pt", length=10)
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # 提取日期部分，便於比對
    combined_data['date_only'] = combined_data['date'].dt.date

    # 按日期和時間排序
    combined_data.sort_values(by=['date_only', 'date'], inplace=True)

    # 生成回測分鐘範圍 (假設每天交易時間為 9:00 ~ 13:30)
    skip_date = None  # 記錄需要跳過的日期
    for current_time in pd.date_range(start=start_date, end=end_date, freq='min'):
        # 如果當天被標記為跳過，則直接略過
        if skip_date and current_time.date() == skip_date:
            continue

        # 檢查是否在交易時間範圍內
        if current_time.time() < pd.Timestamp("09:01").time() or current_time.time() > pd.Timestamp("13:30").time():
            continue
        if current_time.time() == pd.Timestamp("09:01").time():
            flag = False
        if flag:
            continue 
        # print(f"Processing for time {current_time}...")
        
        
       
        

        # 按股票分組進行回測
        time_data = []
        for stock in stock_list:
            stock_data = combined_data[combined_data['tic'] == stock + '.TW']

            if stock_data.empty:
                continue

            # 獲取指定時間的資料
            stock_time_data = stock_data[stock_data['date'] == current_time]

            if not stock_time_data.empty:
                time_data.append(stock_time_data.iloc[0])

        # 檢查是否連續兩筆資料缺失
        if len(time_data) == 0:
            if skip_date == current_time.date():
                continue  # 已經標記過則跳過
            if skip_date is None:
                skip_date = current_time.date()  # 標記當前日期
                continue

        # 如果該時間點有資料，重置跳過日期
        skip_date = None

        if len(time_data) != len(stock_list) and len(time_data) != 0:
            print(time_data)
            raise ValueError("error processing for stock_length")

        # 如果該時間點有資料，傳遞給策略函數
        if time_data:
            trade.data_load(time_data)
        if current_time.time() > pd.Timestamp("09:30").time():
            if len(trade.pay_data) == 0:
                flag =True
        
        
    data = trade.trade
    df = pd.DataFrame(data)
    df.to_csv( year+"output.csv", index=False)



# 執行回測2024-12-15
backtest(start_date="2023-01-01", end_date="2023-12-31",year="2023")


日期變更，清空已存資料: 2023-01-03 -> 2023-01-04
gain 1000.0
length 1
日期變更，清空已存資料: 2023-01-04 -> 2023-01-05
gain 1000.0
length 1
日期變更，清空已存資料: 2023-01-05 -> 2023-01-06
gain 1000.0
length 1
日期變更，清空已存資料: 2023-01-06 -> 2023-01-09
gain 1000.0
length 1
日期變更，清空已存資料: 2023-01-09 -> 2023-01-10
gain 1000.0
length 1
日期變更，清空已存資料: 2023-01-10 -> 2023-01-11
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-11 -> 2023-01-12
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-12 -> 2023-01-13
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-13 -> 2023-01-16
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-16 -> 2023-01-17
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-17 -> 2023-01-30
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-30 -> 2023-01-31
gain -500.0
length 2
日期變更，清空已存資料: 2023-01-31 -> 2023-02-01
gain -500.0
length 2
日期變更，清空已存資料: 2023-02-01 -> 2023-02-02
gain -500.0
length 2
日期變更，清空已存資料: 2023-02-02 -> 2023-02-03
gain -500.0
length 2
日期變更，清空已存資料: 2023-02-03 -> 2023-02-06
gain -500.0
length 2
日期變更，清空已存資料: 2023-02-06 -> 2023-02-07
gain -500.0
length

In [7]:

backtest(start_date="2024-01-01", end_date="2024-11-30",year="2024")

日期變更，清空已存資料: 2024-01-02 -> 2024-01-03
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-03 -> 2024-01-04
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-04 -> 2024-01-05
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-05 -> 2024-01-08
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-08 -> 2024-01-09
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-09 -> 2024-01-10
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-10 -> 2024-01-11
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-11 -> 2024-01-12
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-12 -> 2024-01-15
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-15 -> 2024-01-16
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-16 -> 2024-01-17
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-17 -> 2024-01-18
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-18 -> 2024-01-19
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-19 -> 2024-01-22
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-22 -> 2024-01-23
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-23 -> 2024-01-24
gain -1000.0
length 1
日期變更，清空已存資料: 2024-01-24 -> 2024-01-25
ga

ValueError: error processing for stock_length