In [None]:
from bs4 import BeautifulSoup
import requests
import time
import json
import re
from pprint import pprint

def clean_title(title):
    """清理標題，只保留英文、數字和空格"""
    cleaned = re.sub(r'^觀看\s+', '', title)  # 移除開頭的「觀看 」
    cleaned = re.sub(r'\s*的新機價格$', '', cleaned)  # 移除結尾的「的新機價格」
    cleaned = ' '.join(cleaned.split())
    return cleaned

def get_product_urls(base_url):
    """從品牌頁面獲取所有產品的URL"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 找到所有符合條件的產品連結
        product_containers = soup.find_all('div', class_='col-lg-3 col-md-3 prod-detail normal prize-image prize-image-small')
        
        product_urls = []
        for container in product_containers:
            link = container.find('a')
            if link and link.get('href'):
                full_url = f"https://www.eprice.com.tw{link['href']}"
                product_urls.append(full_url)
                print(f"找到產品連結：{full_url}")
        
        return product_urls
    
    except requests.RequestException as e:
        print(f"獲取產品列表時發生錯誤：{e}")
        return []

def get_product_info(url):
    """獲取產品ID、型號ID和清理後的標題"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        targets = soup.select('body > div.align-center > div > div.basic-info > ul.list-group.market-price-list.pull-right > li > div > div:nth-child(3) > a')

        product_info = []
        
        for target in targets:
            if target.get('href'):
                parts = target['href'].split('/')
                if len(parts) >= 2:
                    info = {
                        'prod_id': parts[-2],
                        'model_id': parts[-1],
                        'title': clean_title(target.get('title', ''))
                    }
                    product_info.append(info)
                    print(f"找到 - 產品ID: {info['prod_id']}, 型號ID: {info['model_id']}")
                    print(f"標題: {info['title']}")
        
        return product_info

    except requests.RequestException as e:
        print(f"獲取產品資訊時發生錯誤：{e}")
        return []

def get_price_data(prod_id, model_id):
    """獲取價格數據"""
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Referer': 'https://www.eprice.com.tw/'
    }
    
    price_url = f'https://www.eprice.com.tw/ajax/intro/get-product-price-data.php?lib=mobile&prod_id={prod_id}&model_id={model_id}'
    
    try:
        response = requests.get(price_url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"獲取價格數據時發生錯誤 (prod_id: {prod_id}, model_id: {model_id}): {e}")
        return None

def process_single_product(url):
    """處理單個產品頁面"""
    print(f"\n處理產品頁面：{url}")
    print("-" * 50)
    
    product_info = get_product_info(url)
    if not product_info:
        print(f"無法獲取產品資訊：{url}")
        return
    
    for info in product_info:
        price_data = get_price_data(info['prod_id'], info['model_id'])
        
        if price_data:
            full_data = {
                'product_url': url,
                'prod_id': info['prod_id'],
                'model_id': info['model_id'],
                'title': info['title'],
                'price_data': price_data
            }
            
            # 使用時間戳來確保檔名唯一
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"{info['title']}_{timestamp}.json"
            
            with open(filename, 'w', encoding='utf-8') as f:
                json.dump(full_data, f, ensure_ascii=False, indent=2)
            
            print(f"數據已保存至：{filename}")
        
        time.sleep(1)  # 延遲避免請求過於頻繁

def main():
    # 品牌頁面URL (網址A)
    brand_url = "https://www.eprice.com.tw/product/brands/Apple/"
    
    # 獲取所有產品URL
    product_urls = get_product_urls(brand_url)
    
    if not product_urls:
        print("未找到任何產品連結，程式結束")
        return
    
    print(f"\n總共找到 {len(product_urls)} 個產品連結")
    
    # 處理每個產品頁面
    for i, url in enumerate(product_urls, 1):
        print(f"\n處理第 {i}/{len(product_urls)} 個產品")
        process_single_product(url)
        time.sleep(2)  # 在處理不同產品之間添加延遲
    
    print("\n" + "="*50)
    print(f"所有產品處理完成！總共處理了 {len(product_urls)} 個產品")
    print("="*50)

if __name__ == "__main__":
    main()
