In [None]:
import os
import re
import requests
from bs4 import BeautifulSoup

base_url = "https://www.gutenberg.org"
language_page = f"{base_url}/browse/languages/zh"

# 建立資料夾
output_dir = "gutenberg_books"
os.makedirs(output_dir, exist_ok=True)

# 判斷書名是否含中文
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]')

def fetch_book_list(language_page):
    """取得中文書籍的連結與書名"""
    response = requests.get(language_page)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")

    books = []
    for link in soup.select("li > a[href]"):
        title = link.text.strip()
        if chinese_char_pattern.search(title):  
            books.append({
                "title": title,
                "url": base_url + link["href"]
            })
    return books

def sanitize_filename(name):
    return re.sub(r'[\\/*?:"<>|]', "_", name)

def download_book(book):
    try:
        response = requests.get(book["url"])
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        download_link = soup.find("a", href=True, text="Plain Text UTF-8")
        if not download_link:
            return
        
        book_url = base_url + download_link["href"]
        book_response = requests.get(book_url)
        book_response.raise_for_status()

        # 儲存書籍
        sanitized_title = sanitize_filename(book["title"])
        file_path = os.path.join(output_dir, f"{sanitized_title}.txt")
        with open(file_path, "wb") as file:
            file.write(book_response.content)
        print(f"已下載: {book['title']}")
    except Exception as e:
        print(f"下載失敗: {book['title']} - {e}")

def main():
    books = fetch_book_list(language_page)
    print(f"開始下載內容")

    for book in books:
        download_book(book)

    print("下載完成")

if __name__ == "__main__":
    main()

