In [None]:
# === Backfill covers (hybrid) with dark-top safety ===
# - AI paints bg + Chinese characters (no Latin)
# - We overlay pinyin + English (brushy) with adaptive backplates
# - Retries if the top band is too dark
# Writes: episodes/<date>-<slug>/cover.(jpg|png)
# Updates: _posts/YYYY-MM-DD-<slug>.md  cover_image

%load_ext autoreload
%autoreload 2

import os, re, io, json, time, subprocess, sys
from pathlib import Path

try:
    import yaml
    from PIL import Image
except Exception as e:
    raise SystemExit("Missing deps. Install with: pip install pyyaml pillow") from e

# project imports
from chengyu.config import settings
from chengyu.cover_hybrid import generate_cover_hybrid

# ------------- Config -------------
GLOB_PATTERN       = "*.md"        # e.g. "2025-08-*.md" to limit scope
OUT_EXT            = "jpg"         # "jpg" or "png"
OUT_SIZE           = 1500          # final square pixels
QUALITY_PARAM      = "medium"      # image API: "low" | "medium" | "high"
PINYIN_Y           = 0.50          # placement under characters
ENGLISH_Y          = 0.78
ATTEMPTS           = 4             # retries if top too dark
SLEEP_SECONDS      = 1.0           # polite pause
FORCE_REGENERATE   = True          # True: rewrite even if cover exists
DELETE_OLD_PNG     = False         # True: remove old cover.png when switching to jpg
UPDATE_FRONTMATTER = True
DO_GIT_COMMIT      = True          # commit & push changes at the end

# ------------- Paths -------------
ROOT     = Path.cwd()
POSTS    = ROOT / "_posts"
EP_DIR   = ROOT / "episodes"

# ------------- Helpers -------------
def _norm_size_local(sz) -> str:
    """Coerce settings.IMAGE_SIZE to a supported string for the image API."""
    if isinstance(sz, (tuple, list)) and len(sz) == 2:
        sz = f"{int(sz[0])}x{int(sz[1])}"
    elif isinstance(sz, int) or (isinstance(sz, str) and sz.isdigit()):
        sz = "1024x1024"
    sz = (sz or "1024x1024").lower()
    return sz if sz in {"1024x1024","1024x1536","1536x1024","auto"} else "1024x1024"

def parse_front_matter(p: Path):
    t = p.read_text(encoding="utf-8")
    if not t.startswith("---"):
        return {}, t
    end = t.find("\n---", 3)
    if end == -1:
        return {}, t
    fm_text = t[4:end]
    body = t[end+4:]
    if body.startswith("\n"):
        body = body[1:]
    try:
        fm = yaml.safe_load(fm_text) or {}
    except Exception:
        fm = {}
    return fm, body

def write_front_matter(p: Path, fm: dict, body: str):
    p.write_text(
        "---\n" + yaml.safe_dump(fm, allow_unicode=True, sort_keys=False) + "---\n\n" + body,
        encoding="utf-8"
    )

def extract_chengyu_pinyin(title: str):
    # Supports (...) and （...）
    m = re.match(r"\s*([^()（）]+?)\s*[\(（]([^)）]+)[\)）]\s*$", title or "")
    if m:
        return m.group(1).strip(), m.group(2).strip()
    return (title or "").strip(), ""

def read_story(folder_dir: Path) -> str:
    # Prefer metadata.json "script", else transcript.txt, else ""
    meta = folder_dir / "metadata.json"
    if meta.exists():
        try:
            d = json.loads(meta.read_text(encoding="utf-8"))
            s = d.get("script", "")
            if isinstance(s, str) and s.strip():
                return s
        except Exception:
            pass
    tr = folder_dir / "transcript.txt"
    if tr.exists():
        try:
            return tr.read_text(encoding="utf-8")
        except Exception:
            pass
    return ""

def git(*args):
    print("+ git", " ".join(args))
    subprocess.check_call(["git", *args], cwd=ROOT)

def top_too_dark(img_bytes: bytes, frac: float = 0.18, lum_thresh: int = 35, max_ratio: float = 0.16) -> bool:
    """Heuristic to detect a very dark top band."""
    im = Image.open(io.BytesIO(img_bytes)).convert("L")
    h = max(1, int(im.height * frac))
    roi = im.crop((0, 0, im.width, h))
    hist = roi.histogram()
    dark = sum(hist[:max(0, lum_thresh)])
    total = roi.width * roi.height
    return (dark / max(1, total)) > max_ratio

def generate_cover_safe(*, chengyu: str, pinyin: str, english: str, story: str,
                        out_format: str, attempts: int = 4) -> bytes:
    """Retry hybrid cover generation if the top region is too dark."""
    size_str = _norm_size_local(getattr(settings, "IMAGE_SIZE", "1024x1024"))
    for i in range(1, attempts + 1):
        cover = generate_cover_hybrid(
            chengyu=chengyu,
            pinyin=pinyin,
            english=english,
            story=story,
            model=getattr(settings, "IMAGE_MODEL", "gpt-image-1"),
            size=size_str,
            quality=QUALITY_PARAM,
            out_size=OUT_SIZE,
            out_format=out_format,   # "JPEG" or "PNG"
            pinyin_y=PINYIN_Y,
            english_y=ENGLISH_Y
        )
        if not top_too_dark(cover):
            if i > 1:
                print(f"  Accepted attempt {i} (top ok).")
            return cover
        print("  Top looks too dark; retrying…")
    print("  Warning: top remained dark after retries; using last image.")
    return cover

# ------------- Sanity -------------
print("OPENAI_API_KEY set? ", bool(os.environ.get("OPENAI_API_KEY")))
print("IMAGE_MODEL:", getattr(settings, "IMAGE_MODEL", None))
print("IMAGE_SIZE :", getattr(settings, "IMAGE_SIZE", None))
print()

# map ext -> PIL format string for our generator
OUT_EXT = OUT_EXT.lower()
if OUT_EXT not in ("jpg", "jpeg", "png"):
    raise SystemExit("OUT_EXT must be 'jpg' or 'png'")
OUT_FORMAT = "PNG" if OUT_EXT == "png" else "JPEG"

# ------------- Main -------------
changed_files = []
generated = 0
updated_posts = 0

for md in sorted(POSTS.glob(GLOB_PATTERN)):
    date = md.name[:10]
    slug = md.stem[11:]
    epdir = EP_DIR / f"{date}-{slug}"
    if not epdir.exists():
        print(f"[skip] {md.name}: missing episodes/{date}-{slug}/")
        continue

    fm, body = parse_front_matter(md)
    title = fm.get("title", "")
    desc  = fm.get("description", "")
    ch, py = extract_chengyu_pinyin(title)
    if not ch:
        print(f"[skip] {md.name}: cannot parse chengyu from title")
        continue

    story = read_story(epdir)

    # paths
    out_name = f"cover.{OUT_EXT}"
    out_path = epdir / out_name
    desired_rel = f"/episodes/{date}-{slug}/{out_name}"

    # skip if exists and not forcing
    if out_path.exists() and not FORCE_REGENERATE:
        if UPDATE_FRONTMATTER and fm.get("cover_image") != desired_rel:
            fm["cover_image"] = desired_rel
            write_front_matter(md, fm, body)
            changed_files.append(md)
            updated_posts += 1
        print(f"[keep] {md.name}: {out_name} exists")
        continue

    print(f"* {date} | {ch} ({py}) → generating {out_name} …")

    try:
        cover_bytes = generate_cover_safe(
            chengyu=ch,
            pinyin=py,
            english=desc,
            story=story,
            out_format=OUT_FORMAT,
            attempts=ATTEMPTS
        )
    except Exception as e:
        print("  ! image generation failed:", e)
        continue

    out_path.write_bytes(cover_bytes)
    generated += 1
    changed_files.append(out_path)

    if UPDATE_FRONTMATTER and fm.get("cover_image") != desired_rel:
        fm["cover_image"] = desired_rel
        write_front_matter(md, fm, body)
        changed_files.append(md)
        updated_posts += 1

    if DELETE_OLD_PNG and OUT_EXT == "jpg":
        old_png = epdir / "cover.png"
        if old_png.exists():
            try:
                subprocess.call(["git", "rm", "-f", str(old_png.relative_to(ROOT))], cwd=ROOT)
            except Exception:
                pass
            try:
                old_png.unlink()
            except Exception:
                pass

    time.sleep(SLEEP_SECONDS)

print(f"\nGenerated covers: {generated}")
print(f"Updated posts   : {updated_posts}")

# ------------- Commit & push -------------
if DO_GIT_COMMIT and changed_files:
    rels = [str(Path(p).relative_to(ROOT)) for p in changed_files if Path(p).exists()]
    if rels:
        git("add", *rels)
    msg = f"Backfill covers (hybrid): {generated} images, {updated_posts} posts updated"
    try:
        git("commit", "-m", msg)
        git("push", "origin", "main")
        print("✔ Pushed cover updates.")
    except subprocess.CalledProcessError as e:
        print("Push failed (you can push manually):", e)
else:
    print("No git commit." if not DO_GIT_COMMIT else "No changes to commit.")
