In [1]:
!pip install requests beautifulsoup4 selenium
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install mysql-connector-python

Collecting selenium
  Downloading selenium-4.27.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.27.1-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.28.0-py3-none-any.whl (486 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.3/486.3 kB[0m [31m15.1 MB/s

In [2]:
import logging
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import sqlite3
import json
import time
import csv


# 設置日誌

In [3]:
logger = logging.getLogger()
logger.handlers.clear()
file_handler = logging.FileHandler("debug.log", mode="a", encoding="utf-8")
file_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logger.addHandler(file_handler)
logger.setLevel(logging.INFO)
file_handler.flush()
file_handler.close()

#chromedriver

In [4]:
def get_chrome_driver():
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-gpu')
    chrome_driver_path = 'chromedriver'
    return webdriver.Chrome(options=options)

# PChome 爬蟲

In [5]:
def get_pchome_products(keyword):
    logging.info(f"開始爬取 PChome 商品資料，搜尋關鍵字：{keyword}")
    try:
        url = f"https://ecshweb.pchome.com.tw/search/v3.3/all/results?q={keyword}&page=1&sort=rnk/dc"
        headers = {"User-Agent": "Mozilla/5.0"}
        logging.info(f"發送請求到 PChome，URL: {url}")
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        logging.info(f"成功接收到 PChome 回應，狀態碼: {response.status_code}")
        data = response.json()
        products = []
        for item in data["prods"]:
            try:
                product = {
                    "name": item["name"],
                    "price": item["price"],
                    "source": "PChome",
                    "link": f"https://24h.pchome.com.tw/prod/{item['Id']}"
                }
                products.append(product)
                logging.debug(f"提取商品: {product['name']}，價格: {product['price']}，連結: {product['link']}")
            except KeyError as e:
                logging.warning(f"PChome 商品資料缺少欄位: {e}")
        if not products:
            logging.warning("PChome 沒有抓取到任何商品資料")
        else:
            logging.info(f"成功獲取 {len(products)} 筆 PChome 資料")
        return products
    except requests.RequestException as e:
        logging.error(f"PChome 爬取失敗: {e}")
        return []
    except Exception as e:
        logging.critical(f"PChome 發生未知錯誤: {e}")
        return []

# MOMO 爬蟲

In [6]:
def get_momo_products(keyword):
    logging.info(f"開始爬取 MOMO 商品資料，搜尋關鍵字：{keyword}")
    url = f"https://www.momoshop.com.tw/search/searchShop.jsp?keyword={keyword}&_isFuzzy=0&searchType=1"
    try:
        driver = get_chrome_driver()
        logging.info(f"成功啟動 Chrome 驅動，正在載入 URL: {url}")
        driver.get(url)
        WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.listAreaUl')))
        logging.info(f"網頁已成功載入，等待商品列表顯示...")
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        driver.quit()
        logging.info(f"成功提取頁面內容，開始解析商品資料...")
        products = []
        for product in soup.select('.listAreaLi'):
            try:
                title = product.select_one('.prdImgWrap img')['alt']
                price_text = product.select_one('.price .value') or product.select_one('.price')
                price = float(price_text.text.strip().replace(",", "").replace("NT$", "")) if price_text else 0.0
                product_url = product.select_one('.goodsUrl a')['href']
                products.append({"name": title, "price": price, "source": "MOMO", "link": product_url})
                logging.debug(f"已成功提取商品：{title}，價格：{price}，連結：{product_url}")
            except Exception as e:
                logging.warning(f"處理商品時出現錯誤：{e}")
        if not products:
            logging.warning("MOMO 爬取到的商品數量為 0，請檢查關鍵字或網頁結構")
        else:
            logging.info(f"成功爬取到 {len(products)} 件商品資料")
        return products
    except Exception as e:
        logging.error(f"爬取 MOMO 商品資料過程中發生錯誤：{e}")
        return []


# 酷彭爬蟲


In [7]:
def get_kubon_products(keyword):
    logging.info(f"開始爬取 Kubon 商品資料，搜尋關鍵字：{keyword}")
    url = f"https://www.tw.coupang.com/search?q={keyword}"
    try:
        driver = get_chrome_driver()
        logging.info(f"成功啟動 Chrome 驅動，正在載入 URL: {url}")
        driver.get(url)
        WebDriverWait(driver, 60).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'SearchResult_searchResultProduct___h6E9')))
        logging.info(f"網頁已成功載入，等待商品列表顯示...")
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        driver.quit()
        logging.info(f"成功提取頁面內容，開始解析商品資料...")
        products = []
        for product in soup.select('.SearchResult_searchResultProduct___h6E9'):
            try:
                title = product.select_one('.Product_title__8K0xk').text.strip() if product.select_one('.Product_title__8K0xk') else '未知商品'
                price = product.select_one('.Product_salePrice__PDmKK .Product_salePricePrice__2FbsL')
                origin_price = product.select_one('.Product_originPrice__BsZCM span')
                link = product.select_one('a')['href'] if product.select_one('a') else '無連結'
                price_value = float(price.text.strip().replace(",", "").replace("$", "")) if price else 0.0
                origin_price_value = float(origin_price.text.strip().replace(",", "").replace("$", "")) if origin_price else 0.0
                products.append({
                    "name": title,
                    "price": price_value,
                    "origin_price": origin_price_value,
                    "source": "Kubon",
                    "link": f"{link}"
                })
                logging.debug(f"已成功提取商品：{title}，價格：{price_value}，原價：{origin_price_value}，連結：{link}")
            except Exception as e:
                logging.warning(f"處理商品時出現錯誤：{e}")
        if not products:
            logging.warning("Kubon 爬取到的商品數量為 0，請檢查關鍵字或網頁結構")
        else:
            logging.info(f"成功爬取到 {len(products)} 件商品資料")
        return products
    except Exception as e:
        logging.error(f"爬取 Kubon 商品資料過程中發生錯誤：{e}")
        return []

# 比價功能

In [8]:
def compare_prices(keyword):
    logging.info(f"開始比價搜尋：{keyword}")
    products = (
        get_pchome_products(keyword) +
        get_momo_products(keyword) +
        get_kubon_products(keyword)
    )
    sorted_products = sorted(products, key=lambda x: x["price"])
    logging.info(f"成功整理 {len(sorted_products)} 筆商品")
    return sorted_products


# 儲存至資料庫

In [9]:
def save_to_csv(products, filename="products.csv"):
    try:
        if not products:
            logging.warning("沒有商品資料可供儲存至 CSV")
            print("無商品資料可存入 CSV 文件。")
            return

        fieldnames = ["name", "price", "source", "link", "origin_price"]

        for product in products:
            for field in fieldnames:
                product.setdefault(field, "")

        with open(filename, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)

            writer.writeheader()
            writer.writerows(products)

        logging.info(f"資料已成功儲存至 {filename}")
        print(f"資料已成功儲存至 {filename}")
    except Exception as e:
        logging.error(f"儲存到 CSV 時發生錯誤: {e}")
        print(f"儲存到 CSV 時發生錯誤: {e}")


In [10]:
def filter_products(products, min_price=None, max_price=None, platforms=None, sort_by="price"):
    if min_price is not None:
        products = [p for p in products if p["price"] >= min_price]
    if max_price is not None:
        products = [p for p in products if p["price"] <= max_price]
    if platforms:
        products = [p for p in products if p["source"] in platforms]
    if sort_by == "price":
        products.sort(key=lambda x: x["price"])
    elif sort_by == "name":
        products.sort(key=lambda x: x["name"])
    elif sort_by == "source":
        products.sort(key=lambda x: x["source"])
    return products

# 主程式流程

In [11]:
def main():
    keyword = input("請輸入商品關鍵字：")
    products = compare_prices(keyword)

    print(f"比價結果商品數: {len(products)}")
    logging.info(f"比價結果商品數: {len(products)}")

    if not products:
        print("沒有找到相關商品，請稍後再試或檢查關鍵字。")
        logging.warning("沒有比價結果，請檢查輸入或目標網站")
        return

    try:
        min_price = float(input("請輸入最低價格 (按 Enter 跳過)：") or 0)
        max_price = float(input("請輸入最高價格 (按 Enter 跳過)：") or float('inf'))
    except ValueError:
        print("價格輸入無效，將跳過價格篩選")
        min_price, max_price = 0, float('inf')

    platforms = input("請輸入要篩選的平台 (以逗號分隔，如 PChome,MOMO,Kubon 按 Enter 跳過)：")
    platforms = [p.strip() for p in platforms.split(",")] if platforms else None

    filtered_products = filter_products(products, min_price, max_price, platforms)

    print(f"篩選後商品數: {len(filtered_products)}")
    logging.info(f"篩選後商品數: {len(filtered_products)}")

    if filtered_products:
        for p in filtered_products:
            print("-" * 67)
            print(f"品名: {p['name']}")
            print(f"價格: {p['price']}")
            print(f"平台: {p['source']}")
            print(f"連結: {p['link']}")
        save_to_csv(filtered_products)
    else:
        print("沒有符合條件的商品")
        logging.info("篩選後沒有結果")


In [12]:
if __name__ == "__main__":
    main()

請輸入商品關鍵字：水壺
比價結果商品數: 80
請輸入最低價格 (按 Enter 跳過)：
請輸入最高價格 (按 Enter 跳過)：
請輸入要篩選的平台 (以逗號分隔，如 PChome,MOMO,Kubon 按 Enter 跳過)：
篩選後商品數: 80
-------------------------------------------------------------------
品名: 霧面運動水壺 25 x 7cm, 天空灰, 800ml, 1個
價格: 45.0
平台: Kubon
連結: https://www.tw.coupang.com/products/%E9%9C%A7%E9%9D%A2%E9%81%8B%E5%8B%95%E6%B0%B4%E5%A3%BA-25-x-7cm-479125861007362?itemId=479125860990978&vendorItemId=479125861007361&sourceType=search&rank=5&searchId=79e1aa7f2457925&q=%E6%B0%B4%E5%A3%BA
-------------------------------------------------------------------
品名: 時間刻度大水壺 附吸管, 商務黑色, 2000ml, 1個
價格: 79.0
平台: Kubon
連結: https://www.tw.coupang.com/products/%E6%99%82%E9%96%93%E5%88%BB%E5%BA%A6%E5%A4%A7%E6%B0%B4%E5%A3%BA-%E9%99%84%E5%90%B8%E7%AE%A1-471810822864905?itemId=471810822848520&vendorItemId=471810822832133&sourceType=search&rank=2&searchId=79e1aa7f2457925&q=%E6%B0%B4%E5%A3%BA
-------------------------------------------------------------------
品名: 時間刻度大水壺 附吸管, 天空藍, 2L, 1個
價格: 79.0
平台: Kub