# 自動上稿
- 自動上傳圖片至指定路徑
- 自動生成html，下載電腦中準備上稿

In [None]:
try:
    import requests, bs4, paramiko
except ImportError:
    !pip install requests beautifulsoup4 paramiko


In [None]:
#!/usr/bin/env python3
import os
import re
import io
import json
import shlex
import requests
import paramiko
import urllib3
from bs4 import BeautifulSoup
from getpass import getpass

# 關閉 SSL 警告（目標站憑證不完整）
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# ========= 基本設定（本機目錄） =========
MAC_USER = "peter"
LOCAL_HTML_DIR = f"/Users/{MAC_USER}/Desktop/html"
LOCAL_IMG_DIR  = f"/Users/{MAC_USER}/Desktop/news_images"
LOCAL_JSON_DIR = f"/Users/{MAC_USER}/tmp"

os.makedirs(LOCAL_HTML_DIR, exist_ok=True)
os.makedirs(LOCAL_IMG_DIR,  exist_ok=True)
os.makedirs(LOCAL_JSON_DIR, exist_ok=True)

# ========= 使用者輸入 =========
postid = input("請輸入 postid (例如 41993): ").strip()
html_url = f"https://hlhome.tzuchi.com.tw/NewsReader.aspx?post_id={postid}"

pic_id  = input("請輸入 pic_id (例如 145CFADA00D8118C48258D230011446B): ").strip()
pic_num = int(input("請輸入照片數量 pic_num (例如 5): ").strip())

year  = input("請輸入年份 YYYY (例如 2025): ").strip()
month = input("請輸入月份 MM  (例如 02): ").strip().zfill(2)
day   = input("請輸入日期 DD  (例如 03): ").strip().zfill(2)

category_in = input("分類 (01=新聞 / 02=日誌 / 也可輸入『新聞/日誌』): ").strip().lower()
def resolve_category_folder(t: str) -> str:
    if t in ("01", "1", "新聞", "news", "01news"):
        return "01news"
    if t in ("02", "2", "日誌", "diary", "02diary"):
        return "02diary"
    return "01news"
category_folder = resolve_category_folder(category_in)

# ========= 遠端 SSH 設定 =========
ssh_host = "10.2.116.138"
ssh_port = 22
ssh_user = input(f"SSH 使用者帳號（{ssh_host}）: ").strip()
ssh_pass = getpass("SSH 密碼（輸入不顯示）: ")

# ========= 第 1 步：抓 HTML 並清洗 =========
print(f"[1/5] 下載 HTML：{html_url}")
resp = requests.get(html_url, timeout=20, verify=False)
resp.encoding = resp.apparent_encoding
html = resp.text

soup = BeautifulSoup(html, "html.parser")

# 【公告時間】
time_text = ""
node_time = soup.find(string=re.compile(r'^【公告時間】'))
if node_time:
    time_text = re.sub(r'^【公告時間】\s*', '', node_time.strip())

# 作者：<span class='post_AncAuthorNR'>
author_text = ""
node_author = soup.find("span", class_="post_AncAuthorNR")
if node_author:
    author_text = re.sub(r'^【公告人】\s*', '', node_author.get_text(strip=True))

# 標題：優先「【公告主旨】...」
title_text = ""
node_title = soup.find(string=re.compile(r'^【公告主旨】'))
if node_title:
    title_text = re.sub(r'^【公告主旨】\s*', '', node_title.strip())
elif soup.title and soup.title.string:
    title_text = soup.title.string.strip()

# 內文：只保留 <p>/<br>/<img>
content_html = ""
content_div = soup.find("div", class_="post_Contain")
if content_div:
    for t in content_div(["script", "style"]):
        t.decompose()
    allowed = {"p", "br", "img"}
    for tag in list(content_div.find_all(True)):
        if tag.name not in allowed:
            tag.unwrap()
        else:
            if tag.name == "img":
                src = tag.get("src", "")
                tag.attrs = {}
                if src:
                    tag.attrs["src"] = src
            else:
                tag.attrs = {}
    content_html = "".join(str(x) for x in content_div.contents).strip()

# 把內文中的圖片路徑替換為相對正式位置
final_img_base = f"images/{category_folder}/{year}/{month}/{month}{day}/"
content_html = re.sub(
    r'Attachment/News/[^/]+/(img_\d+\.jpg)',
    final_img_base + r'\1',
    content_html
)

# 寫本機 HTML
local_html_path = os.path.join(LOCAL_HTML_DIR, f"{postid}.html")
with open(local_html_path, "w", encoding="utf-8") as f:
    f.write(f"<h1>{title_text}</h1>\n")
    f.write(f"<p><strong>時間:</strong> {time_text}</p>\n")
    f.write(f"<p><strong>作者:</strong> {author_text}</p>\n")
    f.write("<hr>\n")
    f.write(content_html)
print(f"    → HTML 存檔：{local_html_path}")

# 也輸出 JSON（若不需要可移除）
local_json_path = os.path.join(LOCAL_JSON_DIR, f"{postid}.json")
with open(local_json_path, "w", encoding="utf-8") as jf:
    json.dump({
        "postid": postid,
        "url": html_url,
        "time": time_text,
        "author": author_text,
        "title": title_text,
        "content": content_html
    }, jf, ensure_ascii=False, indent=2)
print(f"    → JSON 存檔：{local_json_path}")

# ========= 第 2 步：SSH 登入 & sudo 提權 =========
print(f"[2/5] SSH 連線並 sudo 提權：{ssh_user}@{ssh_host}")
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(ssh_host, port=ssh_port, username=ssh_user, password=ssh_pass, timeout=20)

def run_sudo(cmd: str):
    """以 sudo -S -s 執行遠端指令（安靜模式，僅錯誤時輸出）。"""
    sudo_cmd = f"sudo -S -s bash -lc {shlex.quote(cmd)}"
    stdin, stdout, stderr = ssh.exec_command(sudo_cmd, get_pty=True)
    stdin.write(ssh_pass + "\n")
    stdin.flush()
    out = stdout.read().decode("utf-8", "ignore")
    err = stderr.read().decode("utf-8", "ignore").strip()
    code = stdout.channel.recv_exit_status()
    if code != 0 and err:
        print(f"  ⚠️ sudo 執行失敗：{cmd}\n     {err}")
    return code, out, err

# ========= 第 3 步：建立遠端目錄（先開寫入權限方便上傳，最後再鎖回來）=========
remote_base = "/var/www/html/hualien/home/images"
remote_dir  = f"{remote_base}/{category_folder}/{year}/{month}/{month}{day}/"
print(f"[3/5] 準備遠端目錄：{remote_dir}")

run_sudo(f"mkdir -p '{remote_dir}'")
# 先打開寫入權限，避免 SFTP 上傳時沒有寫權限
run_sudo(f"chmod -R 777 '{remote_dir}'")

# 確認目錄存在
code, out, err = run_sudo(f"ls -ld '{remote_dir}'")
if out:
    print("    → 遠端目錄狀態：", out.strip())

# ========= 第 4 步：下載圖片（404 跳過）並 SFTP 上傳 =========
print(f"[4/5] 下載並上傳圖片（總數 {pic_num}）...")
sftp = ssh.open_sftp()
session = requests.Session()
session.verify = False

uploaded = 0
for i in range(1, pic_num + 1):
    img_url = f"https://hlhome.tzuchi.com.tw/Attachment/News/{pic_id}/img_{i}.jpg"
    local_img = os.path.join(LOCAL_IMG_DIR, f"img_{i}.jpg")
    remote_img = f"{remote_dir}img_{i}.jpg"

    try:
        r = session.get(img_url, timeout=25, stream=True)
        if r.status_code != 200:
            print(f"    ⚠️ 圖片不存在或下載失敗：{img_url}（HTTP {r.status_code}）→ 跳過")
            continue

        # 寫入本機暫存（便於除錯 & 你要求路徑 B）
        with open(local_img, "wb") as lf:
            lf.write(r.content)
        if not os.path.exists(local_img):
            print(f"    ❌ 本地暫存檔未找到：{local_img}（跳過上傳）")
            continue
        else:
            print(f"    ✓ 已下載：{local_img}")

        # 上傳到遠端
        sftp.put(local_img, remote_img)
        uploaded += 1
        print(f"    ✓ 已上傳：{remote_img}")

    except Exception as e:
        print(f"    ❌ 上傳失敗：{img_url} → {e}")

sftp.close()

# ========= 第 5 步：回收權限（chown/chmod）=========
print("[5/5] 回收權限（chown/chmod）...")
run_sudo(f"chown -R www-data:www-data '{remote_dir}'")
run_sudo(f"chmod -R 755 '{remote_dir}'")

ssh.close()

print("\n🎉 完成")
print(f"HTML：{local_html_path}")
print(f"JSON：{local_json_path}")
print(f"Images：{uploaded}/{pic_num} 已上傳 → {remote_dir}")
