In [1]:
import cloudscraper
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# 建立 Scraper
scraper = cloudscraper.create_scraper()
base_url = "https://my83.com.tw"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}

keywords = ["產險", "壽險", "投資型"]
max_pages = 100

# 擷取單篇文章與留言內容
def extract_question_page(full_url, keyword):
    print(f"🔍 擷取貼文：{full_url}")
    res = scraper.get(full_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")

    results = []

    # 主文區塊
    question_block = soup.select_one("div.QuestionSection")
    if question_block:
        author = question_block.select_one("div.QuestionSection__nickname")
        content = question_block.select_one("div.QuestionSection__content")
        time_tag = question_block.select_one("div.QuestionSection__time")
        tags = [tag.text.strip() for tag in question_block.select("span.QuestionSection__tag")]

        results.append({
            "keyword": keyword,
            "url": full_url,
            "type": "主文",
            "author": author.text.strip() if author else "",
            "content": content.text.strip() if content else "",
            "time": time_tag.text.strip() if time_tag else "",
            "tags": ", ".join(tags)
        })

    # 留言區塊
    answers = soup.select("div.AnswerSection__content")
    for ans in answers:
        ans_author = ans.select_one("div.AnswerSection__nickname")
        ans_content = ans.select_one("div.AnswerSection__text")
        ans_time = ans.select_one("div.AnswerSection__time")

        results.append({
            "keyword": keyword,
            "url": full_url,
            "type": "留言",
            "author": ans_author.text.strip() if ans_author else "",
            "content": ans_content.text.strip() if ans_content else "",
            "time": ans_time.text.strip() if ans_time else "",
            "tags": ""
        })

    time.sleep(1)
    return results

# 主流程：搜尋 + 擷取
all_data = []

for keyword in keywords:
    print(f"\n🔎 開始爬關鍵字：{keyword}")
    for page in range(1, max_pages + 1):
        search_url = f"https://my83.com.tw/question/search?q={keyword}&page={page}"
        print(f"📄 處理搜尋結果頁：{search_url}")
        res = scraper.get(search_url, headers=headers)
        if res.status_code != 200:
            print(f"⚠️ 無法訪問第 {page} 頁，跳過")
            break

        soup = BeautifulSoup(res.text, "html.parser")
        question_links = soup.select('div.ListQuestion a[href^="/question/"]')

        if not question_links:
            print("🚫 沒有更多貼文，結束此關鍵字")
            break

        for a in question_links:
            href = a.get("href")
            full_url = base_url + href
            try:
                records = extract_question_page(full_url, keyword)
                all_data.extend(records)
            except Exception as e:
                print(f"❌ 解析失敗：{full_url}，錯誤：{e}")

# 匯出成 CSV
df = pd.DataFrame(all_data)
df.to_csv("my83_產險_壽險_投資型.csv", index=False, encoding="utf-8-sig")
print(f"\n✅ 已完成！共匯出 {len(df)} 筆資料至 my83_產險_壽險_投資型.csv")



🔎 開始爬關鍵字：產險
📄 處理搜尋結果頁：https://my83.com.tw/question/search?q=產險&page=1
🔍 擷取貼文：https://my83.com.tw/question/42021
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E7%90%86%E8%B3%A0
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E6%A9%9F%E8%BB%8A%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E5%BC%B7%E5%88%B6%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E7%94%A2%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/42021
🔍 擷取貼文：https://my83.com.tw/question/42013
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E7%90%86%E8%B3%A0
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E6%84%8F%E5%A4%96%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E6%B1%BD%E8%BB%8A%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E5%BC%B7%E5%88%B6%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E7%AC%AC%E4%B8%89%E4%BA%BA%E8%B2%AC%E4%BB%BB%E9%9A%AA
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E5%AE%9A%E6%9C%9F
🔍 擷取貼文：https://my83.com.tw/question/search?q=%E4%B8%BB%E7%B4%84
🔍 擷取貼文：https://my8

KeyboardInterrupt: 