In [75]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from io import StringIO
from tabulate import tabulate
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import time

stock_no = '1101'

# 請幫我將上面的程式碼打包成一個function
def get_token_and_cookie():
    # 設定目標網址
    url = "https://www.tdcc.com.tw/portal/zh/smWeb/qryStock"  # 替換為你的目標網址

    # 初始化 WebDriver（例如使用 Chrome）
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # 如果不需要視覺化瀏覽器，加入這行
    driver = webdriver.Chrome(options=options)
    try:
        # 開啟網頁
        driver.get(url)
        
        # 等待網頁加載完成，直到指定元素出現（id 為 SYNCHRONIZER_TOKEN）
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "SYNCHRONIZER_TOKEN"))
        )
        
        # 取得name為JSESSIONID,_ga,_fbp,_ga_PZZ1MN4BZ8的cookie 的值，並使用name=value的格式串成字串
        cookies = driver.get_cookies()
        target_cookie = ";".join([f"{cookie['name']}={cookie['value']}" for cookie in cookies if cookie['name'] in ["JSESSIONID","_ga","_fbp","_ga_PZZ1MN4BZ8"]])

        if not target_cookie:
            print(f"Cookie not found.")
        
        # 取得 HTML 元素的值（id 為 SYNCHRONIZER_TOKEN）
        token_element = driver.find_element(By.ID, "SYNCHRONIZER_TOKEN")
        token_value = token_element.get_attribute("value")

        # 取得id為scaDate的select元素中所有option的value
        scaDate_element = driver.find_element(By.ID, "scaDate")
        scaDate_options = scaDate_element.find_elements(By.TAG_NAME, "option")
        scaDate_values = [option.get_attribute("value") for option in scaDate_options]
    finally:
        # 關閉瀏覽器
        driver.quit()

    return token_value, target_cookie, scaDate_values

# 呼叫function
token_value, target_cookie, scaDate_values = get_token_and_cookie()

# 將日期由遠到近排序
scaDate_values.sort()

result_of_people = []
result_of_shareholding = []

# 將scaDate_values減少到兩個值
# scaDate_values = scaDate_values[:2]

# 遍歷每個日期
for scaDate in scaDate_values:
    print(f"正在處理日期: {scaDate}")

    # 設定請求的URL
    url = "https://www.tdcc.com.tw/portal/zh/smWeb/qryStock"

    # 設定請求標頭
    headers = {
        'Cookie': target_cookie,
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Origin': 'https://www.tdcc.com.tw',
        'Referer': 'https://www.tdcc.com.tw/portal/zh/smWeb/qryStock',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"'
    }

    # 設定請求內容
    payload = {
        'SYNCHRONIZER_TOKEN': token_value,
        'SYNCHRONIZER_URI': '/portal/zh/smWeb/qryStock',
        'method': 'submit',
        'firDate': scaDate,
        'scaDate': scaDate,
        'sqlMethod': 'StockNo',
        'stockNo': stock_no,
        'stockName': ''
    }

    # 發送POST請求
    response = requests.post(url, headers=headers, data=payload)

    # 檢查回應狀態
    if response.status_code == 200:
        print("請求成功")
        # 取得response的html裡面class為table的元素
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', class_='table')

        # 取得 HTML 元素的值（id 為 SYNCHRONIZER_TOKEN）
        token_element = soup.find(id="SYNCHRONIZER_TOKEN")
        token_value = token_element.get('value')

        # 將 table 的 HTML 包裝成 StringIO 文件對象
        table_html = StringIO(str(table))

        df = pd.read_html(table_html)
        df = df[0]
        # remove the last row
        df = df[:-1]

        # 取得人數列
        people_count = df.iloc[:, 2]

        # 取得持股比例列
        shareholding = df.iloc[:, 3]
        
        if len(result_of_people) == 0:
            # 如果是空的，放入序和人數
            result_of_people = pd.DataFrame({
                '序': df.iloc[:, 0],
                scaDate: people_count
            })

            result_of_shareholding = pd.DataFrame({
                '序': df.iloc[:, 0],
                scaDate: shareholding
            })

        else:
            # 如果已有資料，只加入人數
            result_of_people[scaDate] = people_count

            # 如果已有資料，只加入持股比例
            result_of_shareholding[scaDate] = shareholding

    else:
        print(f"請求失敗，狀態碼：{response.status_code}")
        print(response.text)
    
    # 適當的延遲以避免過於頻繁的請求
    time.sleep(1)

print(tabulate(result_of_people, headers='keys', tablefmt='grid', showindex=False))
print(tabulate(result_of_shareholding, headers='keys', tablefmt='grid', showindex=False))

# 使用 utf-8-sig 編碼來處理 Excel 中文亂碼問題
result_of_people.to_csv(f'/Users/skyluck/Desktop/stock/{stock_no}_people.csv', index=False, encoding='utf-8-sig')
result_of_shareholding.to_csv(f'/Users/skyluck/Desktop/stock/{stock_no}_shareholding.csv', index=False, encoding='utf-8-sig')




正在處理日期: 20240202
請求成功
正在處理日期: 20240207
請求成功
+------+------------+------------+
|   序 |   20240202 |   20240207 |
|    1 |     121140 |     120629 |
+------+------------+------------+
|    2 |     275131 |     275834 |
+------+------------+------------+
|    3 |      67040 |      67173 |
+------+------------+------------+
|    4 |      27349 |      27388 |
+------+------------+------------+
|    5 |      12736 |      12778 |
+------+------------+------------+
|    6 |      12908 |      12928 |
+------+------------+------------+
|    7 |       5598 |       5604 |
+------+------------+------------+
|    8 |       3305 |       3315 |
+------+------------+------------+
|    9 |       5576 |       5593 |
+------+------------+------------+
|   10 |       2222 |       2237 |
+------+------------+------------+
|   11 |        835 |        830 |
+------+------------+------------+
|   12 |        262 |        264 |
+------+------------+------------+
|   13 |        112 |        112 |
+------+----