In [22]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

# 設定 Chrome 瀏覽器的選項
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36")
prefs = {"profile.default_content_setting_values.notifications": 2}
chrome_options.add_experimental_option("prefs", prefs)

# 你的商品關鍵字
thing = "可口可樂"

# 初始化 Chrome 驅動
driver = webdriver.Chrome(options=chrome_options)

url = f"https://www.amazon.com/s?k={thing}"

print("正在使用 Selenium 載入網頁...")
driver.get(url)

try:
    # 這裡我們等待一個通用的元素，例如第一個商品項目的出現
    wait = WebDriverWait(driver, 15) # 等待時間可以設長一點，確保頁面載入
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-component-type="s-search-result"]')))
    print("網頁載入完成，正在獲取頁面原始碼...")
except:
    print("等待超時，無法載入頁面。")
    driver.quit()
    exit()

# 取得完整的頁面原始碼
page_source = driver.page_source

# 關閉瀏覽器
driver.quit()

print("瀏覽器已關閉。開始使用 BeautifulSoup 解析資料...")

# 使用 BeautifulSoup 解析 HTML
soup = BeautifulSoup(page_source, 'html.parser')

正在使用 Selenium 載入網頁...
網頁載入完成，正在獲取頁面原始碼...
瀏覽器已關閉。開始使用 BeautifulSoup 解析資料...


In [23]:
soup

<html class="a-js a-audio a-video a-canvas a-svg a-drag-drop a-geolocation a-history a-webworker a-autofocus a-input-placeholder a-textarea-placeholder a-local-storage a-gradients a-transform3d a-touch-scrolling a-text-shadow a-text-stroke a-box-shadow a-border-radius a-border-image a-opacity a-transform a-transition null" data-19ax5a9jf="dingo" data-aui-build-date="3.25.5-2025-08-12" lang="zh-tw"><!-- sp:feature:head-start --><head><script async="" crossorigin="anonymous" src="https://images-na.ssl-images-amazon.com/images/I/216YVwoRFDL.js"></script><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>
<!-- sp:end-feature:head-start -->
<!-- sp:feature:csm:head-open-part1 -->
<script type="text/javascript">var ue_t0=ue_t0||+new Date();</script>
<!-- sp:end-feature:csm:head-open-part1 -->
<!-- sp:feature:cs-optimization -->
<meta content="on" http-equiv="x-dns-prefetch-control"/>
<link href="https://images-na.ssl-images-amazon.com" rel="dns-prefetch"/>
<link 

In [24]:
products = soup.find_all('div', {'data-component-type': 's-search-result'})

In [25]:
products

[<div class="sg-col-4-of-24 sg-col-4-of-12 s-result-item s-asin sg-col-4-of-16 sg-col s-widget-spacing-small sg-col-4-of-20" data-asin="B0012QO8AY" data-cel-widget="search_result_1" data-component-id="8" data-component-type="s-search-result" data-index="2" data-uuid="08c74d8b-12f8-4081-8491-265380256513" id="08c74d8b-12f8-4081-8491-265380256513" role="listitem"><div class="sg-col-inner"><div cel_widget_id="MAIN-SEARCH_RESULTS-2" class="s-widget-container s-spacing-small s-widget-container-height-small celwidget slot=MAIN template=SEARCH_RESULTS widgetId=search-results_1" data-cel-widget="MAIN-SEARCH_RESULTS-2" data-csa-c-id="p23gl9-k9bu5h-qacqxv-yudaz7" data-csa-c-item-id="amzn1.asin.1.B0012QO8AY" data-csa-c-pos="1" data-csa-c-type="item" data-csa-op-log-render=""><span class="a-declarative" data-action="puis-card-container-declarative" data-csa-c-func-deps="aui-da-puis-card-container-declarative" data-csa-c-id="1cpe8b-ys45td-u6xkgk-ks6i0m" data-csa-c-item-id="amzn1.asin.B0012QO8AY" da

In [13]:

# 假設 products 仍然是我們從 soup.find_all(...) 得到的列表
for i, product_div in enumerate(products, 1):
    print(f"處理第 {i} 個商品項目...")
    
    # 在當前這個商品項目的 div 內部，尋找包含連結的 <a> 標籤
    # 根據你提供的 HTML，這個 <a> 標籤有 class='a-link-normal s-link-style a-text-normal'
    link_element = product_div.find('a', {'class': 'a-link-normal s-line-clamp-4 s-link-style a-text-normal'})
    
    if link_element:
        # 從找到的 <a> 標籤中獲取 href
        href = link_element.get('href')
        
        # 確保連結是完整的 URL
        full_url = f"https://www.amazon.com{href}"
        print(f"成功找到連結: {full_url}")
    else:
        # 如果找不到連結，可能這個項目是廣告或其他特殊格式
        print(f"無法在商品項目 {i} 中找到連結，可能它是一個廣告。")

print("所有連結處理完畢。")

處理第 1 個商品項目...
成功找到連結: https://www.amazon.com/-/zh_TW/sspa/click?ie=UTF8&spc=MToxODkyOTYyMzQyNzc5NjU3OjE3NTUxNTE1NjA6c3BfYXRmOjMwMDgwNDMzMjUxOTkwMjo6MDo6&url=%2FKWC4-BT-AZ-Coca-Cola-KWC4-KWC-44%25E5%2585%25AC%25E5%258D%2587%25E5%25AE%25B9%25E9%2587%258F%25EF%25BC%258F6%25E7%25BD%2590%25E9%25A3%25B2%25E6%2596%2599%25E8%25A3%259D%25E4%25BE%25BF%25E6%2594%259C%25E5%25BC%258F%25E5%2586%25B0%25E7%25AE%25B1%25EF%25BC%258F%25E8%25BF%25B7%25E4%25BD%25A0%25E5%2586%25B0%25E7%25AE%25B1%25EF%25BC%258C%25E9%2581%25A9%25E7%2594%25A8%25E6%2596%25BC%25E9%25A3%259F%25E5%2593%2581%25E3%2580%2581%25E9%25A3%25B2%25E6%2596%2599%25E3%2580%2581%25E5%25AE%25B6%25E5%25BA%25AD%25E3%2580%2581%25E8%25BE%25A6%25E5%2585%25AC%25E5%25AE%25A4%25E3%2580%2581%25E5%25AE%25BF%25E8%2588%258D%25E3%2580%2581%25E6%25B1%25BD%25E8%25BB%258A%25E3%2580%2581%25E8%2588%25B9%25E8%2588%25B6-AC%25EF%25BC%258FDC%25E6%258F%2592%25E9%25A0%25AD%25EF%25BC%258C%25E5%25AE%25B9%25E9%2587%258F%25E7%25B4%25844-2%25E5%25A4%25B8%25E8%2584%25AB%25

In [14]:
full_url

'https://www.amazon.com/-/zh_TW/Nostalgia-%E9%9B%99%E9%80%9F%E9%9B%BB%E5%8B%95%E5%8F%AF%E5%8F%A3%E5%8F%AF%E6%A8%82%E9%99%90%E9%87%8F%E7%89%88%E5%A5%B6%E6%98%94%E6%A9%9F%E5%92%8C%E9%A3%B2%E6%96%99%E6%94%AA%E6%8B%8C%E6%A9%9F-%E5%8C%85%E6%8B%AC-473-6-%E4%B8%8D%E9%8F%BD%E9%8B%BC%E6%94%AA%E6%8B%8C%E6%9D%AF%E5%92%8C%E6%A3%92/dp/B01H1UXEPO/ref=sr_1_60?dib=eyJ2IjoiMSJ9._HNiT2-YezDWLnt-MlZgGWZ3RaUSSEcg3mOflbE43fYfIy-CdyaAfgfUqDm2DqQZoDvdoaQ0Mv9_jW933hqlS2ZQ5rPMPHMqgDKHJgfGzUYltEpWbntSTr2bBbhI56VEuXuqU7YRmNKDZBb8ISOtqk_zgw96KiRhw5JTLCAo3q05tbcrwjeRdBeqJivVSCk2p4MA0TR6S36oD_pE8P4UAQfU7Df_Yl_EdVR6H3Zm-VXG7UNdi220F5mvFkty8OwjLZAcZqtlvrIFyGbygFddgWY9z0rCvz0WMR5BkspPGT0.lCd5H5dSJgHZNetIKLvEZgT_vQXApGskeLal1RU28pM&dib_tag=se&keywords=%E5%8F%AF%E5%8F%A3%E5%8F%AF%E6%A8%82&qid=1755151560&sr=8-60'

In [15]:
import requests
def scrape_product_details(url):
    """
    使用 requests 爬取單一亞馬遜商品頁面的詳細資料
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
        'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8' # 設置語言，確保獲取中文內容
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() # 如果請求失敗會拋出錯誤
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 抓取標題
        title_element = soup.find(id='productTitle')
        title = title_element.text.strip() if title_element else "N/A"
        
        # 抓取價格
        # 亞馬遜的價格標籤經常變動，這裡使用常見的 class 和 id
        price_element = soup.find('span', class_='a-offscreen')
        price = price_element.text.strip() if price_element else "N/A"
        
        # 抓取評分
        rating_element = soup.find('span', id='acrCustomerReviewText')
        rating_count = rating_element.text.strip() if rating_element else "N/A"
        
        # 抓取描述
        description_element = soup.find(id='productDescription')
        description = description_element.text.strip() if description_element else "N/A"
        
        # 抓取圖片連結
        image_element = soup.find('img', id='landingImage')
        image_url = image_element.get('src') if image_element else "N/A"

        return {
            'URL': url,
            '標題': title,
            '價格': price,
            '評分數量': rating_count,
            '描述': description,
            '圖片': image_url
        }
        
    except requests.exceptions.RequestException as e:
        print(f"抓取商品頁面失敗 {url}: {e}")
        return None

In [18]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
    'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8' # 設置語言，確保獲取中文內容
}
response = requests.get(full_url, headers=headers)

In [19]:
soup = BeautifulSoup(response.text, 'html.parser')

In [20]:
soup

<!DOCTYPE html>
<html class="a-no-js" data-19ax5a9jf="dingo" lang="zh-tw"><!-- sp:feature:head-start -->
<head><script>var aPageStart = (new Date()).getTime();</script><meta charset="utf-8"/>
<!-- sp:end-feature:head-start -->
<!-- sp:feature:csm:head-open-part1 -->
<script type="text/javascript">var ue_t0=ue_t0||+new Date();</script>
<!-- sp:end-feature:csm:head-open-part1 -->
<!-- sp:feature:cs-optimization -->
<meta content="on" http-equiv="x-dns-prefetch-control"/>
<link href="https://images-na.ssl-images-amazon.com" rel="dns-prefetch"/>
<link href="https://m.media-amazon.com" rel="dns-prefetch"/>
<link href="https://completion.amazon.com" rel="dns-prefetch"/>
<!-- sp:end-feature:cs-optimization -->
<!-- sp:feature:csm:head-open-part2 -->
<script type="text/javascript">
window.ue_ihb = (window.ue_ihb || window.ueinit || 0) + 1;
if (window.ue_ihb === 1) {

var ue_csm = window,
    ue_hob = +new Date();
(function(d){var e=d.ue=d.ue||{},f=Date.now||function(){return+new Date};e.d=func