In [4]:
# === Backfill one episode: create a GitHub Release for its MP3 and update the post ===
# Usage (after running this cell):
#   backfill_one_by_chengyu("画蛇添足")
#   backfill_one_by_folder("2025-08-21-yi-shi-e-r-nia-o")

import os, re, io, json, glob, subprocess, datetime
from pathlib import Path
from typing import Tuple, Optional

import yaml
import requests

# ---- project settings ----
from chengyu.config import settings

ROOT   = Path.cwd()
POSTS  = ROOT / "_posts"
EP_DIR = ROOT / "episodes"

# ---- Git helpers ----
def git(*args):
    print("+ git", " ".join(args))
    subprocess.check_call(["git", *args], cwd=ROOT)

# ---- Front matter helpers ----
def parse_front_matter(p: Path):
    t = p.read_text(encoding="utf-8")
    if not t.startswith("---"):
        return {}, t
    end = t.find("\n---", 3)
    if end == -1:
        return {}, t
    fm_text = t[4:end]
    body = t[end+4:]
    if body.startswith("\n"):
        body = body[1:]
    fm = yaml.safe_load(fm_text) or {}
    return fm, body

def write_front_matter(p: Path, fm: dict, body: str):
    p.write_text("---\n" + yaml.safe_dump(fm, allow_unicode=True, sort_keys=False) + "---\n\n" + body,
                 encoding="utf-8")

def extract_chengyu_pinyin(title: str) -> Tuple[str, str]:
    m = re.match(r"\s*([^()（）]+?)\s*[\(（]([^)）]+)[\)）]\s*$", title or "")
    if m: return m.group(1).strip(), m.group(2).strip()
    return (title or "").strip(), ""

# ---- GitHub Release helpers ----
GITHUB_API = "https://api.github.com"

def _gh_token():
    tok = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
    if not tok:
        raise RuntimeError("GITHUB_TOKEN (or GH_TOKEN) not set")
    return tok

def _gh_headers():
    return {"Authorization": f"token {_gh_token()}", "Accept": "application/vnd.github+json"}

def _gh_create_or_get_release(repo: str, tag: str, name: str, body: str = "") -> dict:
    url = f"{GITHUB_API}/repos/{repo}/releases"
    r = requests.post(url, headers=_gh_headers(),
                      json={"tag_name": tag, "name": name, "body": body, "draft": False, "prerelease": False},
                      timeout=60)
    if r.status_code in (200,201):
        return r.json()
    if r.status_code == 422 and "already_exists" in r.text:
        r2 = requests.get(f"{GITHUB_API}/repos/{repo}/releases/tags/{tag}", headers=_gh_headers(), timeout=60)
        r2.raise_for_status()
        return r2.json()
    raise RuntimeError(f"Create release failed: {r.status_code} {r.text}")

def _gh_upload_asset(upload_url_template: str, filename: str, blob: bytes, content_type: str) -> dict:
    upload_url = upload_url_template.split("{")[0] + f"?name={filename}"
    headers = _gh_headers(); headers["Content-Type"] = content_type
    r = requests.post(upload_url, headers=headers, data=blob, timeout=300)
    if r.status_code not in (200,201):
        raise RuntimeError(f"Upload asset failed: {r.status_code} {r.text}")
    return r.json()

# ---- Finders ----
def _find_post_by_chengyu(substr: str) -> Optional[Path]:
    matches = []
    for md in sorted(POSTS.glob("*.md")):
        fm, _ = parse_front_matter(md)
        title = fm.get("title", "")
        if substr in title:
            matches.append(md)
    return matches[-1] if matches else None

def _find_post_by_folder(folder: str) -> Optional[Path]:
    # folder = "YYYY-MM-DD-slug"
    date = folder[:10]
    slug = folder[11:]
    cand = POSTS / f"{date}-{slug}.md"
    return cand if cand.exists() else None

def _derive_folder_from_md(md: Path) -> str:
    return f"{md.name[:10]}-{md.stem[11:]}"

# ---- Core backfill ----
def _backfill_release_for_post(md: Path, *, upload_only_if_missing=True, keep_repo_audio=True):
    fm, body = parse_front_matter(md)
    folder = _derive_folder_from_md(md)
    epdir  = EP_DIR / folder

    if not epdir.exists():
        raise RuntimeError(f"Episode dir missing: {epdir}")

    title = fm.get("title","")
    ch, py = extract_chengyu_pinyin(title)
    date_str = md.name[:10]
    slug     = md.stem[11:]

    # find MP3 (prefer episodes/<folder>/audio.mp3, else any .mp3)
    mp3_path = epdir / "audio.mp3"
    if not mp3_path.exists():
        cand = sorted(epdir.glob("*.mp3"))
        if cand:
            mp3_path = cand[0]
    if not mp3_path.exists():
        raise RuntimeError(f"No MP3 found in {epdir}")

    audio_bytes = mp3_path.read_bytes()
    audio_name  = f"{date_str}-{slug}.mp3"  # stable name for asset
    tag         = f"v{date_str.replace('-','')}-{slug}"
    rel_name    = f"{ch} ({py})" if py else ch
    rel_body    = f"Episode: {ch}"

    # if already has a release URL and we don't want to re-upload, bail
    if upload_only_if_missing and str(fm.get("audio_url","")).startswith("https://github.com/"):
        print("Already has a Release URL in front-matter; skipping upload.")
        return

    rel  = _gh_create_or_get_release(settings.REPO, tag=tag, name=rel_name, body=rel_body)
    asset = _gh_upload_asset(rel["upload_url"], filename=audio_name, blob=audio_bytes, content_type="audio/mpeg")
    asset_url = asset.get("browser_download_url")
    print("Release asset:", asset_url)

    # update front matter
    fm["audio_url"]   = asset_url
    fm["audio_bytes"] = len(audio_bytes)
    write_front_matter(md, fm, body)

    # optional: remove repo audio to slim repo (you can keep it if you prefer)
    if not keep_repo_audio and (epdir / "audio.mp3").exists():
        try:
            subprocess.call(["git", "rm", "-f", str((epdir/"audio.mp3").relative_to(ROOT))], cwd=ROOT)
        except Exception:
            pass
        try:
            (epdir / "audio.mp3").unlink()
        except Exception:
            pass

    # commit & push
    git("add", str(md.relative_to(ROOT)))
    if keep_repo_audio:
        git("add", str(mp3_path.relative_to(ROOT)))
    git("commit", "-m", f"Backfill release for {folder}")
    git("push", "origin", "main")
    print("✔ Updated post & pushed.")

# ---- Public helpers you call ----
def backfill_one_by_chengyu(chengyu_substr: str, *, keep_repo_audio=True):
    md = _find_post_by_chengyu(chengyu_substr)
    if not md:
        raise RuntimeError(f"No post title contains: {chengyu_substr}")
    print("Post:", md)
    _backfill_release_for_post(md, keep_repo_audio=keep_repo_audio)

def backfill_one_by_folder(folder: str, *, keep_repo_audio=True):
    md = _find_post_by_folder(folder)
    if not md:
        raise RuntimeError(f"Post not found for: {folder}")
    print("Post:", md)
    _backfill_release_for_post(md, keep_repo_audio=keep_repo_audio)

print("READY. Examples:")
print('  backfill_one_by_chengyu("画蛇添足")')
print('  backfill_one_by_folder("2025-08-21-yi-shi-e-r-nia-o")')
print("Repo:", settings.REPO)
print("GITHUB_TOKEN set:", bool(os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")))


READY. Examples:
  backfill_one_by_chengyu("画蛇添足")
  backfill_one_by_folder("2025-08-21-yi-shi-e-r-nia-o")
Repo: kohlenberg/chengyudaily
GITHUB_TOKEN set: True


In [5]:
backfill_one_by_folder("2025-08-28-wen-ji-qi-wu")

Post: /Users/tilman/github3/chengyudaily/_posts/2025-08-28-wen-ji-qi-wu.md
Release asset: https://github.com/kohlenberg/chengyudaily/releases/download/v20250828-wen-ji-qi-wu/2025-08-28-wen-ji-qi-wu.mp3
+ git add _posts/2025-08-28-wen-ji-qi-wu.md
+ git add episodes/2025-08-28-wen-ji-qi-wu/audio.mp3
+ git commit -m Backfill release for 2025-08-28-wen-ji-qi-wu
[main 6268485] Backfill release for 2025-08-28-wen-ji-qi-wu
 1 file changed, 2 insertions(+), 1 deletion(-)
+ git push origin main
✔ Updated post & pushed.


To https://github.com/kohlenberg/chengyudaily.git
   54004c1..6268485  main -> main
