基本爬蟲

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from urllib.parse import quote
import time

def get_coupang_search_results(search_keyword: str, advanced_keywords: list[str]):
    encoded_keyword = quote(search_keyword)
    url = f"https://www.tw.coupang.com/search?q={encoded_keyword}&channel=user"

    options = Options()
    # 若需隱藏瀏覽器可啟用
    options.add_argument("--headless=new")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--lang=zh-TW")

    driver = webdriver.Chrome(
        service=Service(
            ChromeDriverManager().install()),
            options=options
    )
    driver.get(url)
    time.sleep(5)

    html = driver.page_source
    # 若需重複操作瀏覽器可先保留不關閉
    driver.quit()

    soup = BeautifulSoup(html, "html.parser")
    product_cards = soup.select(
        "div.SearchResult_searchResultProduct___h6E9"
    )

    results = []
    matched_results = []

    for card in product_cards:
        try:
            # 完整內容文字
            full_text = card.get_text(separator=" ", strip=True)

            # 精簡標題（僅取 title 區塊）
            title_tag = card.select_one("div.Product_title__8K0xk")
            title = title_tag.get_text(strip=True) if title_tag else "N/A"

            # 價格
            price_tag = card.select_one(
                "span.Product_salePricePrice__2FbsL span"
            )
            price = price_tag.get_text(strip=True) if price_tag else "N/A"

            # 每單位價格
            unit_price_tag = card.select_one("div.Product_unitPrice__QQPdR")
            unit_price = unit_price_tag.get_text(strip=True) if unit_price_tag else "N/A"

            product = {
                "title": title,
                "full_text": full_text,
                "price": price,
                "unit_price": unit_price
            }

            results.append(product)

            # 進階條件：從完整描述中過濾
            if all(kw in full_text for kw in advanced_keywords):
                matched_results.append(product)

        except Exception as e:
            print("解析錯誤：", e)
            continue

    return results, matched_results

# 主程式區塊
if __name__ == "__main__":
    # 搜尋主關鍵字
    search_keyword = "味丹 氣泡水"

    # 進階篩選條件（需同時包含全部關鍵詞）
    advanced_keywords = ["檸檬風味"]

    # 執行爬取
    all_products, filtered_products = get_coupang_search_results(
        search_keyword,
        advanced_keywords
    )

    print(f"搜尋關鍵字：{search_keyword}")
    # print("所有搜尋結果：")
    # for idx, product in enumerate(all_products, 1):
    #     print(f"{idx}. 標題: {product['title']}")
    #     print(f"   價格: {product['price']}")
    #     print(f"   每單位: {product['unit_price']}")
    #     print(f"   完整內容: {product['full_text']}")
    #     print("-" * 60)

    print(f"\n進階條件符合項目（包含：{'、'.join(advanced_keywords)}）：")
    for idx, product in enumerate(filtered_products, 1):
        print(f"{idx}. 標題: {product['title']}")
        print(f"   價格: {product['price']}")
        print(f"   每單位: {product['unit_price']}")
        print(f"   完整內容: {product['full_text']}")
        print("-" * 60)

搜尋關鍵字：味丹 氣泡水

進階條件符合項目（包含：檸檬風味）：
1. 標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶
   價格: $287
   每單位: ($2.14/100ml)
   完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶 特價 46折 $624 $287 ($2.14/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 滿 $490 ( 911 )
------------------------------------------------------------


寫入資料庫

In [25]:
import os
import pymysql
import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from urllib.parse import quote
from dotenv import load_dotenv

# 載入 .env 檔案中的變數
load_dotenv()

# MariaDB 連線設定
DB_HOST = os.getenv("DB_HOST")
DB_PORT = int(os.getenv("DB_PORT", 3306))
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

def insert_into_db(data_list):
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_NAME,
        charset='utf8mb4'
    )

    with connection:
        with connection.cursor() as cursor:
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS coupang_products (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    search_keyword VARCHAR(255),
                    title TEXT,
                    full_text TEXT,
                    price VARCHAR(50),
                    unit_price VARCHAR(50),
                    timestamp DATETIME
                );
            """)
            for data in data_list:
                cursor.execute("""
                    INSERT INTO coupang_products (search_keyword, title, full_text, price, unit_price, timestamp)
                    VALUES (%s, %s, %s, %s, %s, %s);
                """, (
                    data["search_keyword"],
                    data["title"],
                    data["full_text"],
                    data["price"],
                    data["unit_price"],
                    data["timestamp"]
                ))
        connection.commit()

def get_coupang_search_results(search_keyword: str, advanced_keywords: list[str]):
    encoded_keyword = quote(search_keyword)
    url = f"https://www.tw.coupang.com/search?q={encoded_keyword}&channel=user"

    options = Options()
    options.add_argument("--headless=new")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--lang=zh-TW")

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options
    )
    driver.get(url)
    time.sleep(5)
    html = driver.page_source
    driver.quit()

    soup = BeautifulSoup(html, "html.parser")
    product_cards = soup.select(
        "div.SearchResult_searchResultProduct___h6E9"
    )

    now = datetime.now()
    results = []

    for card in product_cards:
        try:
            full_text = card.get_text(separator=" ", strip=True)
            title_tag = card.select_one("div.Product_title__8K0xk")
            title = title_tag.get_text(strip=True) if title_tag else "N/A"
            price_tag = card.select_one(
                "span.Product_salePricePrice__2FbsL span"
            )
            price = price_tag.get_text(strip=True) if price_tag else "N/A"
            unit_price_tag = card.select_one("div.Product_unitPrice__QQPdR")
            unit_price = unit_price_tag.get_text(strip=True) if unit_price_tag else "N/A"

            product = {
                "search_keyword": search_keyword,
                "title": title,
                "full_text": full_text,
                "price": price,
                "unit_price": unit_price,
                "timestamp": now
            }
            results.append(product)

        except Exception as e:
            print("解析錯誤：", e)
            continue

    return results

if __name__ == "__main__":
    search_keyword = "味丹 氣泡水"
    advanced_keywords = ["檸檬風味"]

    results = get_coupang_search_results(
        search_keyword,
        advanced_keywords
    )
    insert_into_db(results)

    print("資料已寫入 MariaDB。")


資料已寫入 MariaDB。


查詢資料庫

In [27]:
# 進階條件（同時符合）
advanced_keywords = ["味丹 多喝水", "檸檬風味"]

def query_advanced_results():
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_NAME,
        charset='utf8mb4'
    )

    with connection:
        with connection.cursor() as cursor:
            sql = "SELECT title, price, unit_price, full_text, timestamp FROM coupang_products;"
            cursor.execute(sql)
            rows = cursor.fetchall()

            print(
                f"\n進階查詢（包含：{'、'.join(advanced_keywords)}）結果：\n"
            )
            for row in rows:
                full_text = row[3]
                if all(kw in full_text for kw in advanced_keywords):
                    print(f"標題: {row[0]}")
                    print(f"價格: {row[1]}")
                    print(f"每單位: {row[2]}")
                    print(f"完整內容: {row[3]}")
                    print(f"時間戳記: {row[4]}")
                    print("-" * 60)

if __name__ == "__main__":
    query_advanced_results()



進階查詢（包含：味丹 多喝水、檸檬風味）結果：

標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶
價格: $287
每單位: ($2.14/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶 特價 46折 $624 $287 ($2.14/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 滿 $490 ( 909 )
時間戳記: 2025-05-12 00:52:40
------------------------------------------------------------
標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 48瓶
價格: $674
每單位: ($2.51/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 48瓶 特價 55折 $1,248 $674 ($2.51/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 ( 909 )
時間戳記: 2025-05-12 00:52:40
------------------------------------------------------------
標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 72瓶
價格: $1,061
每單位: ($2.63/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 72瓶 特價 57折 $1,872 $1,061 ($2.63/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 ( 909 )
時間戳記: 2025-05-12 00:52:40
------------------------------------------------------------
標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶
價格: $287
每單位: ($2.14/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶 特價 46折 $624 $287 ($2.14/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 滿 $490 ( 909 )
時間戳記: 2025-05-12 00

檢查重複

In [29]:
# 進階條件（同時符合）
advanced_keywords = ["味丹 多喝水", "檸檬風味"]

def query_advanced_results():
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_NAME,
        charset='utf8mb4'
    )

    # 記錄已顯示的完整內容
    seen_full_text = set()

    with connection:
        with connection.cursor() as cursor:
            sql = "SELECT title, price, unit_price, full_text, timestamp FROM coupang_products ORDER BY timestamp DESC;"
            cursor.execute(sql)
            rows = cursor.fetchall()

            print(f"\n進階查詢（包含：{'、'.join(advanced_keywords)}）結果：\n")
            for row in rows:
                title, price, unit_price, full_text, timestamp = row

                # 進階條件比對 & 重複內容排除
                if all(kw in full_text for kw in advanced_keywords):
                    if full_text in seen_full_text:
                        # 跳過重複內容
                        continue
                    seen_full_text.add(full_text)

                    print(f"標題: {title}")
                    print(f"價格: {price}")
                    print(f"每單位: {unit_price}")
                    print(f"完整內容: {full_text}")
                    print(f"時間戳記: {timestamp}")
                    print("-" * 60)

if __name__ == "__main__":
    query_advanced_results()



進階查詢（包含：味丹 多喝水、檸檬風味）結果：

標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶
價格: $287
每單位: ($2.14/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 24瓶 特價 46折 $624 $287 ($2.14/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 滿 $490 ( 909 )
時間戳記: 2025-05-12 00:53:31
------------------------------------------------------------
標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 48瓶
價格: $674
每單位: ($2.51/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 48瓶 特價 55折 $1,248 $674 ($2.51/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 ( 909 )
時間戳記: 2025-05-12 00:53:31
------------------------------------------------------------
標題: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 72瓶
價格: $1,061
每單位: ($2.63/100ml)
完整內容: 味丹 多喝水 MORE氣泡水 檸檬風味, 560ml, 72瓶 特價 57折 $1,872 $1,061 ($2.63/100ml) 7折 優惠券 明天 5/13 (二) 預計送達 免運 ( 909 )
時間戳記: 2025-05-12 00:53:31
------------------------------------------------------------


輸出資料庫全部內容

In [18]:
def query_all_products():
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_NAME,
        charset='utf8mb4'
    )

    with connection:
        with connection.cursor() as cursor:
            sql = "SELECT id, search_keyword, title, price, unit_price, full_text, timestamp FROM coupang_products ORDER BY id DESC;"
            cursor.execute(sql)
            rows = cursor.fetchall()

            print("資料庫 coupang_products 全部內容：\n")
            for row in rows:
                print(f"ID: {row[0]}")
                print(f"關鍵字: {row[1]}")
                print(f"標題: {row[2]}")
                print(f"價格: {row[3]}")
                print(f"每單位: {row[4]}")
                print(f"完整內容: {row[5]}")
                print(f"時間戳記: {row[6]}")
                print("-" * 80)

if __name__ == "__main__":
    query_all_products()


資料庫 coupang_products 全部內容：

ID: 90
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 2包
價格: $298
每單位: ($2.13/100ml)
完整內容: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 2包 特價 6折 $500 $298 ($2.13/100ml) 7折 優惠券 缺貨 免運 滿 $490 ( 52 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 89
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 豆腐貓砂, 原味, 7L, 6袋
價格: $675
每單位: ($1.61/100ml)
完整內容: DORO CAT 豆乳貓 豆腐貓砂, 原味, 7L, 6袋 特價 45折 $1,500 $675 ($1.61/100ml) 7折 優惠券 缺貨 免運 ( 489 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 88
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 6包
價格: $675
每單位: ($1.61/100ml)
完整內容: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 6包 特價 45折 $1,500 $675 ($1.61/100ml) 7折 優惠券 缺貨 免運 ( 52 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 87
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 豆腐貓砂

資料查詢並匯出

In [19]:
import pandas as pd
from datetime import datetime

def query_products(
    start_date=None, 
    end_date=None, 
    limit=None, 
    offset=None,
    # 如：'output.csv' 或 'output.xlsx'
    export_path=None
):
    connection = pymysql.connect(
        host=DB_HOST,
        port=DB_PORT,
        user=DB_USER,
        password=DB_PASSWORD,
        database=DB_NAME,
        charset='utf8mb4',
        # 回傳 dict 格式
        cursorclass=pymysql.cursors.DictCursor
    )

    with connection:
        with connection.cursor() as cursor:
            sql = "SELECT id, search_keyword, title, price, unit_price, full_text, timestamp FROM coupang_products WHERE 1=1"
            params = []

            # 日期篩選
            if start_date and end_date:
                sql += " AND timestamp BETWEEN %s AND %s"
                params.extend([start_date, end_date])

            # 排序
            sql += " ORDER BY id DESC"

            # 分頁功能
            if limit:
                sql += " LIMIT %s"
                params.append(limit)
                if offset:
                    sql += " OFFSET %s"
                    params.append(offset)

            cursor.execute(sql, params)
            rows = cursor.fetchall()

            # 顯示資料
            print("查詢結果：")
            for row in rows:
                print(f"ID: {row['id']}")
                print(f"關鍵字: {row['search_keyword']}")
                print(f"標題: {row['title']}")
                print(f"價格: {row['price']}")
                print(f"每單位: {row['unit_price']}")
                print(f"完整內容: {row['full_text']}")
                print(f"時間戳記: {row['timestamp']}")
                print("-" * 80)

            # 匯出功能
            if export_path:
                df = pd.DataFrame(rows)
                if export_path.endswith(".csv"):
                    df.to_csv(
                        export_path,
                        index=False,
                        encoding="utf-8-sig"
                    )
                    print(f"資料已匯出為 CSV：{export_path}")
                elif export_path.endswith(".xlsx"):
                    df.to_excel(export_path, index=False)
                    print(f"資料已匯出為 Excel：{export_path}")
                else:
                    print("不支援的檔案格式，只支援 .csv 與 .xlsx")

if __name__ == "__main__":
    # 篩選日期（可調整）
    start_date = "2024-12-01"
    end_date = "2025-12-31"

    # 分頁參數（可調整）
    limit = 10
    offset = 0

    # 匯出路徑（None 表示不匯出）
    # 或 coupang_output.csv
    export_path = "coupang_output.xlsx"

    # 執行查詢
    query_products(
        start_date=start_date,
        end_date=end_date,
        limit=limit,
        offset=offset,
        export_path=export_path
    )


查詢結果：
ID: 90
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 2包
價格: $298
每單位: ($2.13/100ml)
完整內容: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 2包 特價 6折 $500 $298 ($2.13/100ml) 7折 優惠券 缺貨 免運 滿 $490 ( 52 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 89
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 豆腐貓砂, 原味, 7L, 6袋
價格: $675
每單位: ($1.61/100ml)
完整內容: DORO CAT 豆乳貓 豆腐貓砂, 原味, 7L, 6袋 特價 45折 $1,500 $675 ($1.61/100ml) 7折 優惠券 缺貨 免運 ( 489 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 88
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 6包
價格: $675
每單位: ($1.61/100ml)
完整內容: DORO CAT 豆乳貓 活性碳與小蘇打雙重除臭顆粒低過敏極細豆腐貓砂, 無香, 7L, 6包 特價 45折 $1,500 $675 ($1.61/100ml) 7折 優惠券 缺貨 免運 ( 52 )
時間戳記: 2025-05-12 00:45:26
--------------------------------------------------------------------------------
ID: 87
關鍵字: DORO CAT 豆乳貓
標題: DORO CAT 豆乳貓 豆腐貓砂, 無味, 7L, 1袋
價格: $115
