In [49]:
from chengyu.publisher import publish_episode
from chengyu.cover_flow import make_cover_bytes
from chengyu.config import settings
from chengyu.dedupe import list_existing_chengyu
from chengyu.gen import gen_unique_episode_strict, script_to_markdown
from chengyu.tts import tts_mp3

# 0) Unique gen
forbidden = list_existing_chengyu(settings.REPO, settings.GITHUB_BRANCH)
data = gen_unique_episode_strict(settings.SHOW_NAME, settings.GEN_MODEL, forbidden, batch_size=20, max_rounds=20)

# 1) Cover (hybrid + dark-top safety)
cover_bytes, cover_ext = make_cover_bytes(data, attempts=4, out_format="JPEG")

# 2) Audio
audio_mp3 = tts_mp3(data["script"], settings.TTS_MODEL, settings.TTS_VOICE)

# 3) Markdown
body_md = script_to_markdown(data["chengyu"], data["pinyin"], data["gloss"], data["teaser"], data["script"], settings.GEN_MODEL)

# 4) Publish (Release upload ON; repo audio OFF)
publish_episode(
    show_name=settings.SHOW_NAME,
    repo=settings.REPO,
    branch=settings.GITHUB_BRANCH,
    site_url=settings.SITE_URL,
    baseurl=settings.BASEURL,
    publish_time_utc=settings.PUBLISH_TIME_UTC,
    data=data,
    body_md=body_md,
    cover_bytes=cover_bytes,
    cover_ext=cover_ext,                 # "jpg" or "png"
    audio_mp3=audio_mp3,
    upload_audio_to_release=True,        # << create GitHub Release
    write_audio_to_repo=True,           # << don't store MP3 in repo (optional)
    dry_run=settings.DRY_RUN,
)


+ git clone --depth 1 --branch main *** /var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyu_seen_s76vy5hb


Cloning into '/var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyu_seen_s76vy5hb'...


+ git -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 clone --filter=blob:none --depth 1 --branch main *** /var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyu_pub_d_dlac1j


Cloning into '/var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyu_pub_d_dlac1j'...
[KUpdating files: 100% (176/176), done.


+ git config user.name Chengyu Publisher Bot
+ git config user.email actions@users.noreply.github.com
+ git add .
+ git commit -m Add episode 2025-08-29-wang-yang-bu-lao
[main 08ccd07] Add episode 2025-08-29-wang-yang-bu-lao
 5 files changed, 44 insertions(+)
 create mode 100644 _posts/2025-08-29-wang-yang-bu-lao.md
 create mode 100644 episodes/2025-08-29-wang-yang-bu-lao/audio.mp3
 create mode 100644 episodes/2025-08-29-wang-yang-bu-lao/cover.jpeg
 create mode 100644 episodes/2025-08-29-wang-yang-bu-lao/metadata.json
 create mode 100644 episodes/2025-08-29-wang-yang-bu-lao/transcript.txt
+ git push origin main


KeyboardInterrupt: 

In [47]:
# Convert "Characters" tables to simple lines across all posts
import re, subprocess
from pathlib import Path
import yaml

ROOT = Path.cwd()
POSTS = ROOT / "_posts"

def git(*args):
    print("+ git", " ".join(args))
    subprocess.check_call(["git", *args], cwd=ROOT)

def parse_fm(p: Path):
    t = p.read_text(encoding="utf-8")
    if not t.startswith("---"): return {}, t
    end = t.find("\n---", 3)
    fm_text = t[4:end]
    body = t[end+4:]
    if body.startswith("\n"): body = body[1:]
    try:
        fm = yaml.safe_load(fm_text) or {}
    except Exception:
        fm = {}
    return fm, body

def write_fm(p: Path, fm: dict, body: str):
    p.write_text(
        "---\n" + yaml.safe_dump(fm, allow_unicode=True, sort_keys=False) + "---\n\n" + body,
        encoding="utf-8"
    )

def extract_table_rows(block: str):
    """Return list of [char, pinyin, meaning] from a Characters block that has a table."""
    # split any glued rows: '| ... | | ... |' -> separate lines
    block = re.sub(r"\|\s+\|", "|\n|", block)
    lines = [ln for ln in block.splitlines() if ln.strip().startswith("|")]
    if not lines:
        return []
    rows = []
    for i, ln in enumerate(lines):
        raw = ln.strip().strip("|")
        cells = [c.strip() for c in raw.split("|")]
        # skip header/separator-ish lines
        if i == 0:
            continue
        if all(re.fullmatch(r"[-—–\s]+", c or "") for c in cells):
            continue
        if len(cells) >= 3:
            rows.append(cells[:3])
    return rows

def already_simple_lines(block: str) -> bool:
    # Heuristic: looks like "字 (pinyin) — meaning" lines already
    return bool(re.search(r"^\s*[\u3400-\u9fff]+\s*\([^)]+\)\s*[—-]\s+.+$", block, flags=re.M))

def replace_characters_with_lines(body: str) -> tuple[str, bool]:
    """
    Find 'Characters' section; if it contains a table (or broken table),
    replace that block with simple lines. Keep other content intact.
    """
    m = re.search(r"(?mis)(^|\n)#{1,6}\s*Characters\s*\n(?P<block>.*?)(?=\n#{1,6}\s|\Z)", body)
    if not m:
        return body, False

    block = m.group("block")
    if already_simple_lines(block):
        return body, False

    rows = extract_table_rows(block)
    if not rows:
        # maybe the block has the table collapsed to one line; try to parse it as one chunk
        one_line = block.replace("\n", " ")
        # split by double pipes as pseudo row boundary
        chunks = [g for g in re.split(r"\|\s*\|", one_line) if "|" in g]
        rows = []
        for ch in chunks:
            cells = [c.strip() for c in re.findall(r"\|\s*([^|]+?)\s*(?=\|)", ch)]
            if len(cells) >= 3:
                rows.append(cells[:3])

    if not rows:
        return body, False

    # Build simple lines
    lines = []
    for char, pinyin, meaning in rows:
        # normalize unicode dash to an en dash
        pinyin = pinyin.strip()
        meaning = meaning.strip()
        lines.append(f"{char} ({pinyin}) — {meaning}  ")

    simple = "\n".join(lines).rstrip() + "\n"

    # Rebuild the Characters block: keep the heading + our lines; drop old table
    # Find the heading line so we can reconstruct neatly
    head_match = re.search(r"(?mi)^#{1,6}\s*Characters\s*$", body)
    heading = body[head_match.start():head_match.end()] if head_match else "### Characters"
    fixed_block = f"{heading}\n\n{simple}\n"

    new_body = body[:m.start()] + fixed_block + body[m.end():]
    return new_body, (new_body != body)

changed = []
for md in sorted(POSTS.glob("*.md")):
    fm, body = parse_fm(md)
    new_body, did = replace_characters_with_lines(body)
    if did:
        write_fm(md, fm, new_body)
        changed.append(md)

print(f"Updated {len(changed)} post(s):", [p.name for p in changed])

if changed:
    git("add", *[str(p.relative_to(ROOT)) for p in changed])
    git("commit", "-m", "Convert Characters tables to simple lines")
    try:
        git("push", "origin", "main")
        print("✔ Pushed. Pages will rebuild.")
    except Exception as e:
        print("Push failed:", e)
else:
    print("No changes needed.")


Updated 0 post(s): []
No changes needed.
