In [27]:
#　https://www.tpex.org.tw/www/zh-tw/emerging/historical?type=Monthly&date=2025/01/01&code=7709&response=json　　興櫃個股歷史行情


import requests as req
import pandas as pd
import os
import time
import random

In [28]:
# 投標開始日前
cols = [
    "證券代號",
    "投標開始日",
    "前一日平均成交價",
    "前十日內平均成交價",
    "前十日內漲幅",
    "前一日成交金額",
    "前十日內平均成交金額",
    "前一日最高成交價",
    "前一日最低成交價",
    "前一日成交筆數",
    "前一日成交股數",
    "前十日內平均成交筆數",
    "前十日內平均成交股數",
    #"近五次競拍股票漲幅"                    # 目標標的前五個競拍股票其上市首日((收盤-投標日前一日股價)/投標日前一日股價)*100之平均
]


In [29]:
import time
import random
import requests as req
import pandas as pd

def get_price_table(code, y, m, headers):
    y2 = y if m > 1 else y - 1
    m2 = m - 1 if m > 1 else 12
    url_curr = f"https://www.tpex.org.tw/www/zh-tw/emerging/historical?type=Monthly&date={y}/{m:02d}/01&code={code}&response=json"
    url_prev = f"https://www.tpex.org.tw/www/zh-tw/emerging/historical?type=Monthly&date={y2}/{m2:02d}/01&code={code}&response=json"

    # 設定最多嘗試 5 次
    for attempt in range(1, 6):
        try:
            r1 = req.get(url_curr, headers=headers, timeout=10)
            r2 = req.get(url_prev, headers=headers, timeout=10)

            # 檢查是否兩者都成功連線
            if r1.status_code == 200 and r2.status_code == 200:
                json1 = r1.json()
                json2 = r2.json()

                # 確保 JSON 內真的有 tables 數據 (櫃買中心有時會回傳空資料)
                if "tables" in json1 and "tables" in json2:
                    df1 = pd.DataFrame(json1["tables"][0]["data"], columns=json1["tables"][0]["fields"])
                    df2 = pd.DataFrame(json2["tables"][0]["data"], columns=json2["tables"][0]["fields"])
                    df_combi = pd.concat([df1, df2], ignore_index=True)
                    return df_combi
                else:
                    print(f"第 {attempt} 次嘗試：代碼 {code} 格式正確但無資料內容。")
            else:
                print(f"第 {attempt} 次嘗試：代碼 {code} 連線失敗 (Status: {r1.status_code}, {r2.status_code})")

        except Exception as e:
            print(f"第 {attempt} 次嘗試：發生意外錯誤 -> {e}")

        # 失敗後的等待，隨次數增加等待時間 (Exponential Backoff)
        if attempt < 5:
            wait_time = attempt * 3 + random.uniform(1, 3)
            print(f"等待 {wait_time:.1f} 秒後重新嘗試...")
            time.sleep(wait_time)

    print(f"已達最大嘗試次數，無法取得代碼 {code} 的資料。")
    return None

def fix_date(date_str):
    parts = date_str.split('/')
    year = int(parts[0]) + 1911
    date_s = f"{year}/{parts[1]}/{parts[2]}"
    date = pd.to_datetime(date_s)
    return date

def data_output(df):
    re = {}
    # --- 單日指標 (前一日，即最後一列) ---
    re['前一日平均成交價'] = df['成交均價'].iloc[-1]
    re['前一日成交金額'] = df['成交金額(元)'].iloc[-1]
    re['前一日最高成交價'] = df['成交最高'].iloc[-1]
    re['前一日最低成交價'] = df['成交最低'].iloc[-1]
    re['前一日成交筆數'] = df['筆數'].iloc[-1]
    re['前一日成交股數'] = df['成交股數'].iloc[-1]

    # --- 十日平均指標 (最後 10 列) ---
    re['前十日內平均成交價'] = df['成交均價'].iloc[-10:].mean().round(3)
    re['前十日內平均成交金額'] = df['成交金額(元)'].iloc[-10:].mean().round(0) # 金額通常不計小數
    re['前十日內平均成交筆數'] = df['筆數'].iloc[-10:].mean().round(0)
    re['前十日內平均成交股數'] = df['成交股數'].iloc[-10:].mean().round(0)

    # --- 計算指標 ---
    # 前十日內漲幅：公式通常是 (今日均價 - 十日前均價) / 十日前均價
    # 這裡依照你原先寫法：(平均價 - 前一日價) / 前一日價
    first_price_in_window = df['成交均價'].iloc[-10] if len(df) >= 10 else df['成交均價'].iloc[0]
    re['前十日內漲幅'] = round((re['前一日平均成交價'] - first_price_in_window) / first_price_in_window, 3)

    return re



In [30]:

def main():
    url = "https://www.tpex.org.tw/www/zh-tw/emerging/historical?type=Monthly&date=2016/1/01&code=6026&response=json"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36'}

    save_folder = "/content/drive/MyDrive/Colab Notebooks/stock_auction_pred_project/csv"
    raw_data_path = f'{save_folder}/bid_info.csv'
    history_price_info_path = f'{save_folder}/history_price_info.csv'

    if os.path.exists(history_price_info_path):
        price_df = pd.read_csv(history_price_info_path, encoding="utf-8", dtype={"證券代號":str})
        price_df["投標開始日"] = pd.to_datetime(price_df["投標開始日"], format="mixed")
        if not os.path.exists(raw_data_path):
            print("can not find rawdata")
            return
        rd = pd.read_csv(raw_data_path, encoding="utf-8", dtype={"證券代號":str})
        rd["投標開始日"] = pd.to_datetime(rd["投標開始日"], format="mixed")
    else:
        price_df = pd.DataFrame(columns=cols).set_index(['證券代號', '投標開始日'])

    rd_indexed = rd.set_index(['證券代號', '投標開始日'])
    price_df_indexed = price_df.set_index(['證券代號', '投標開始日'])

    diff_index = rd_indexed.index.difference(price_df_indexed.index)
    new_df = pd.DataFrame(index=diff_index, columns=price_df_indexed.columns)
    new_df.reset_index(inplace=True)

    for code, date in diff_index:
        print(f"Processing 股號 : {code} and 投標時間 : {date}")
        try:
            y = date.year
            m = date.month
            df = get_price_table(code, y, m, headers=headers)
            df = df.iloc[:, 0:7]
            new_cols = df.columns[1:7]
            for col in new_cols:
                df[col] = df[col].astype(str).str.replace(',', '').astype(float)
            df["日期"] = df["日期"].apply(fix_date)
            df = df[df["日期"] < date]
            df.sort_values(by='日期', ascending=True, inplace=True)
            df = df.reset_index(drop=True)

            re = data_output(df)
            print(re)
            new_df.loc[(new_df["證券代號"] == code) & (new_df["投標開始日"] == date), list(re.keys())] = list(re.values())
            new_df[list(re.keys())] = new_df[list(re.keys())].apply(pd.to_numeric, errors='coerce')     # 欄位格式由str轉float
            print("-" * 50)
            time.sleep(random.uniform(3, 5))
        except Exception as e:
            print(f"Error occurred for code {code} and date {date}: {e}")
            print("-" * 50)
            continue
    result = pd.concat([price_df, new_df])
    result.to_csv(history_price_info_path, index=False, encoding="utf-8-sig")
    print("DONE")

if __name__ == "__main__":
    main()


DONE


  result = pd.concat([price_df, new_df])
