
# 🎙️ Chengyu Bites — Unique Episode Generator & GitHub Publisher

This notebook:
1. Picks a **new (unused)** 成语 (skips any already published in your repo).
2. Generates the **podcast script** (JSON mode, robust).
3. Creates **cover.png** and **transcript.txt**.
4. (Optional) Generates **audio.mp3** with OpenAI TTS.
5. Publishes in **one commit** (avoids multiple Pages builds):
   - Uploads MP3 as a **GitHub Release** asset.
   - Commits `_posts/YYYY-MM-DD-slug.md` + `episodes/<date>-<slug>/cover.png, transcript.txt, metadata.json`.

Repo assumed: **`kohlenberg/chengyudaily`**.


## 1) Setup & Secrets

In [48]:

import sys, os, getpass, subprocess

print("Kernel python:", sys.executable)
# Install needed packages into THIS kernel
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U",
                       "requests", "pillow", "python-dotenv", "pyyaml", "openai>=1.40"])

# Load .env if present; otherwise prompt (values remain only in-memory for this session)
from dotenv import load_dotenv, find_dotenv
load_dotenv(dotenv_path=find_dotenv(usecwd=True))

def ensure_env(var):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Enter {var}: ")

for key in ("OPENAI_API_KEY", "GITHUB_TOKEN"):
    ensure_env(key)

print("OPENAI_API_KEY set? ", bool(os.environ.get("OPENAI_API_KEY")))
print("GITHUB_TOKEN set?   ", bool(os.environ.get("GITHUB_TOKEN")))


Kernel python: /Users/tilman/opt/anaconda3/envs/chengyudaily/bin/python
OPENAI_API_KEY set?  True
GITHUB_TOKEN set?    True


## 2) Config

In [49]:

# ---- You can tweak these ----
SHOW_NAME   = "Chengyu Bites"
REPO        = "kohlenberg/chengyudaily"                    # owner/repo
SITE_URL    = "https://kohlenberg.github.io/chengyudaily"  # public site base
GEN_MODEL   = "gpt-4o-mini"          # text generation model
TTS_MODEL   = "gpt-4o-mini-tts"      # tts model
TTS_VOICE   = "alloy"                # tts voice
PUBLISH_TIME_UTC = "10:00:00 +0000"  # front matter time
USE_PINYIN_SLUG  = True              # safer ASCII slugs for URLs

DRY_RUN = False     # True: don't touch GitHub (no release, no commit)
DO_TTS  = True      # False: skip audio generation


## 3) Helpers (slugify, cover image)

In [50]:

import io, re, json, unicodedata, textwrap
from PIL import Image, ImageDraw, ImageFont

def slugify(text: str) -> str:
    text = unicodedata.normalize("NFKD", text)
    text = re.sub(r"[\W_]+", "-", text, flags=re.U).strip("-").lower()
    return text or "episode"

def script_to_markdown(chengyu: str, pinyin: str, gloss: str, teaser: str, script: str) -> str:
    """
    Takes the raw script and returns well-structured Markdown:
    - H1 title (characters + pinyin)
    - short teaser
    - Overview (gloss)
    - Phrase card (big)
    - Characters table (char | pinyin | meaning)
    - Origin story (paragraphs)
    - Examples (bilingual blocks)
    - Closing
    """
    from openai import OpenAI
    import re
    client = OpenAI()

    # soften the SSML-ish tags so the model ignores them
    cleaned = re.sub(r"\[break\s*[0-9.]+s\]", " ", script)

    SYSTEM = (
        "You are a precise formatter. Turn an input Chinese 成语 podcast script "
        "into clean, concise Markdown with clear sections. Do not add extra commentary. "
        "Keep Chinese characters intact; use English where appropriate."
    )

    INSTR = f"""
Goal: Reformat the provided script for the idiom "{chengyu} ({pinyin})" into Markdown.

Rules:
- Do NOT use code fences.
- Keep it compact and readable.
- Headings: use ## (not #) inside the page (the post layout already has an H1 with the title).
- Use a Markdown table for the character breakdown with columns: 字 | Pinyin | Meaning.
- Render examples as bilingual blocks: Chinese on top, English on the next line (use <br> between them).
- Do not include SSML or [break] tags.

Must output sections in this order:

# TITLE (first line): {chengyu} ({pinyin})
> {teaser}

## Overview
{gloss}

## Phrase
**{chengyu}** — {pinyin}

## Characters
(Table with rows for each character: 字 | pinyin | meaning)

## Origin
(4–5 sentences, concise)

## Examples
- Chinese<br>English
- Chinese<br>English
- Chinese<br>English

## Closing
(Restates {chengyu} and a one-line English meaning/sign-off)
"""

    resp = client.chat.completions.create(
        model=GEN_MODEL,  # your existing gen model (e.g., "gpt-4o-mini")
        temperature=0.3,
        messages=[
            {"role":"system","content":SYSTEM},
            {"role":"user","content":INSTR},
            {"role":"user","content":cleaned}
        ]
    )
    md = resp.choices[0].message.content.strip()
    # very common: models add code fences—strip if present
    md = re.sub(r"^```(markdown|md)?\s*|\s*```$", "", md, flags=re.S|re.I)
    return md

def ensure_font(size: int):
    for cand in [
        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
        "/System/Library/Fonts/PingFang.ttc",
        "/System/Library/Fonts/Supplemental/Arial Unicode.ttf",
        "/Library/Fonts/Arial Unicode.ttf",
    ]:
        try:
            return ImageFont.truetype(cand, size)
        except Exception:
            pass
    return ImageFont.load_default()

def draw_cover_png(chengyu: str, pinyin: str, gloss: str) -> bytes:
    W = H = 3000
    bg = "#0e1116"
    img = Image.new("RGB", (W, H), bg)
    d = ImageDraw.Draw(img)

    font_show = ensure_font(120)
    font_cn   = ensure_font(440)
    font_py   = ensure_font(150)
    font_gl   = ensure_font(90)

    d.text((150, 180), SHOW_NAME, font=font_show, fill=(180,200,255))

    bbox_cn = d.textbbox((0,0), chengyu, font=font_cn)
    w_cn = bbox_cn[2]-bbox_cn[0]; h_cn = bbox_cn[3]-bbox_cn[1]
    x_cn = (W - w_cn)//2; y_cn = (H - h_cn)//2 - 140
    d.text((x_cn, y_cn), chengyu, font=font_cn, fill=(255,255,255))

    bbox_py = d.textbbox((0,0), pinyin, font=font_py)
    w_py = bbox_py[2]-bbox_py[0]
    x_py = (W - w_py)//2; y_py = y_cn + h_cn + 60
    d.text((x_py, y_py), pinyin, font=font_py, fill=(200,220,255))

    gloss_wrapped = textwrap.fill(gloss, width=30)
    d.multiline_text((150, H-520), gloss_wrapped, font=font_gl, fill=(160,180,220), spacing=12)

    buf = io.BytesIO()
    img.save(buf, "PNG", optimize=True)
    return buf.getvalue()


## 4) GitHub API helpers + fetch already-used 成语

In [51]:

import base64, requests, re, json

GITHUB_API = "https://api.github.com"

def gh_headers():
    token = os.environ.get("GITHUB_TOKEN")
    if not token:
        raise RuntimeError("GITHUB_TOKEN not set.")
    return {"Authorization": f"token {token}", "Accept": "application/vnd.github+json"}

def gh_create_release(repo: str, tag: str, name: str, body: str = "", draft=False, prerelease=False):
    url = f"{GITHUB_API}/repos/{repo}/releases"
    payload = {"tag_name": tag, "name": name, "body": body, "draft": draft, "prerelease": prerelease}
    r = requests.post(url, headers=gh_headers(), json=payload, timeout=60)
    if r.status_code not in (200,201):
        if r.status_code == 422 and "already_exists" in r.text:
            r2 = requests.get(f"{GITHUB_API}/repos/{repo}/releases/tags/{tag}", headers=gh_headers(), timeout=60)
            r2.raise_for_status()
            return r2.json()
        raise RuntimeError(f"Create release failed: {r.status_code} {r.text}")
    return r.json()

def gh_upload_asset(upload_url_template: str, filename: str, data: bytes, content_type: str = "application/octet-stream"):
    upload_url = upload_url_template.split("{")[0] + f"?name={filename}"
    headers = gh_headers(); headers["Content-Type"] = content_type
    r = requests.post(upload_url, headers=headers, data=data, timeout=300)
    if r.status_code not in (200,201):
        raise RuntimeError(f"Upload asset failed: {r.status_code} {r.text}")
    return r.json()

def fetch_used_chengyu(repo: str, branch: str = "main") -> set[str]:
    """Collect previously published chengyu from episodes/*/metadata.json and _posts/*.md."""
    headers = gh_headers()
    used = set()

    # A) from episodes/*/metadata.json
    r = requests.get(f"{GITHUB_API}/repos/{repo}/contents/episodes?ref={branch}", headers=headers, timeout=60)
    if r.status_code == 200:
        for item in r.json():
            if item.get("type") == "dir":
                m = requests.get(f"{GITHUB_API}/repos/{repo}/contents/{item['path']}/metadata.json?ref={branch}",
                                 headers=headers, timeout=60)
                if m.status_code == 200:
                    try:
                        meta = json.loads(base64.b64decode(m.json()["content"]).decode("utf-8"))
                        ch = (meta.get("chengyu") or "").strip()
                        if ch: used.add(ch)
                    except Exception:
                        pass

    # B) fallback: parse title from _posts/*.md
    r = requests.get(f"{GITHUB_API}/repos/{repo}/contents/_posts?ref={branch}", headers=headers, timeout=60)
    if r.status_code == 200:
        for item in r.json():
            if item.get("type") == "file" and item["name"].endswith(".md"):
                c = requests.get(f"{GITHUB_API}/repos/{repo}/contents/{item['path']}?ref={branch}",
                                 headers=headers, timeout=60)
                if c.status_code == 200:
                    try:
                        text = base64.b64decode(c.json()["content"]).decode("utf-8", errors="ignore")
                        m = re.search(r'^title:\s*"(.*?)"', text, re.M)
                        if m:
                            title = m.group(1)
                            ch = title.split(" (")[0].strip()
                            if ch: used.add(ch)
                    except Exception:
                        pass

    return used


## 5) OpenAI generation (JSON mode) + optional TTS

In [52]:

from openai import OpenAI
client = OpenAI()

def gen_episode_with_exclusions(show_name: str, exclude: list[str]):
    SYSTEM = (
        "You create short, conversational podcast episodes about Chinese 成语. "
        "Return ONLY a JSON object. Do not include code fences or extra text."
    )

    excludes_txt = ""
    if exclude:
        sample = list(exclude)[:60]  # keep prompt compact
        excludes_txt = "\nDO NOT choose any of these idioms: " + ", ".join(sample) + ". If you pick one, pick another.\n"

    STRUCT = f"""
Pick a well-known Chinese 成语 at random and create a short, conversational episode.
{excludes_txt}
Follow this structure EXACTLY in the "script" field:
1) Intro: Start with: "Welcome to {show_name} — your quick summary on Chinese 成语." Add a one-sentence teaser about the theme. Add [break 1s].
2) Reveal: Say "The phrase is:" then the idiom in CHINESE CHARACTERS, followed by the pinyin.
3) Character breakdown: Each character with pinyin and meaning, each line ending with [break 0.5s].
4) Full idiom again: characters + literal & figurative meaning. Add [break 1s].
5) Origin story: 4–5 sentences. Start with "Here’s the story behind it:" then [break 1.5s], then the story, then [break 1.5s].
6) Three examples: For each, give Mandarin on one line and English on the next. Put [break 1s] after each pair.
7) Closing: Repeat the idiom in Chinese and the short English meaning; thank the listener and sign off with: "Thanks for listening to {show_name}! See you next time for another idiom." End with [break 1s].

Important:
- Keep the idiom in CHINESE CHARACTERS in the script (use pinyin only where asked).
- Use [break 0.5s], [break 1s], [break 1.5s]. No SSML.
- Slightly slower tone via wording and breaks (≈90%).

Return JSON with keys:
{{
  "chengyu": "<characters>",
  "pinyin": "<pinyin with tone marks>",
  "gloss": "<literal + figurative meaning in one short line>",
  "teaser": "<one-sentence teaser>",
  "script": "<full episode script with [break] tags>"
}}
"""

    resp = client.chat.completions.create(
        model=GEN_MODEL,
        temperature=0.7,
        response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": STRUCT},
        ],
    )
    return json.loads(resp.choices[0].message.content)

def gen_episode_unique(show_name: str, repo: str, branch: str = "main", max_tries: int = 6):
    used = fetch_used_chengyu(repo, branch=branch)
    used_norm = {u.strip() for u in used if u and isinstance(u, str)}
    for i in range(max_tries):
        data = gen_episode_with_exclusions(show_name, exclude=list(used_norm))
        ch = (data.get("chengyu") or "").strip()
        if ch and ch not in used_norm:
            return data
        used_norm.add(ch)
    raise RuntimeError("Couldn't get a new (unused) 成语 after several attempts. Try again.")

def tts_mp3(script_text: str) -> bytes:
    # Replace [break] tags with newlines for TTS
    import re, io
    cleaned = re.sub(r"\[break\s*[0-9.]+s\]", "\n\n", script_text)
    with client.audio.speech.with_streaming_response.create(
        model=TTS_MODEL,
        voice=TTS_VOICE,
        input=cleaned
    ) as response:
        buf = io.BytesIO()
        for chunk in response.iter_bytes():
            buf.write(chunk)
    return buf.getvalue()


## 6) Generate a **unique** 成语 episode

In [53]:

ep = gen_episode_unique(SHOW_NAME, repo=REPO, branch="main")
print("Chengyu :", ep["chengyu"])
print("Pinyin  :", ep["pinyin"])
print("Gloss   :", ep["gloss"])
print("Teaser  :", ep["teaser"])

print("\n--- SCRIPT (first 1200 chars) ---\n")
print(ep["script"][:1200] + ("..." if len(ep["script"])>1200 else ""))


Chengyu : 不遗余力
Pinyin  : bù yí yú lì
Gloss   : spare no effort; do one's utmost
Teaser  : Today, we explore the theme of dedication and effort.

--- SCRIPT (first 1200 chars) ---

Welcome to Chengyu Bites — your quick summary on Chinese 成语. Today, we explore the theme of dedication and effort. [break 1s] The phrase is: 不遗余力, bù yí yú lì. [break 1s] 不 (bù) - not [break 0.5s] 遗 (yí) - leave behind [break 0.5s] 余 (yú) - surplus [break 0.5s] 力 (lì) - strength [break 1s] Full idiom again: 不遗余力 literally means 'not leaving behind any surplus of strength', figuratively it means to spare no effort or to do one’s utmost. [break 1s] Here’s the story behind it: [break 1.5s] This idiom originates from a historical account of a general who, during a crucial battle, made sure that every soldier was fully equipped and motivated. He believed in giving everything he had to ensure victory. His determination inspired his troops, and they fought valiantly, ultimately winning the battle. This saying has si

## 7) (Optional) Edit before publishing

In [54]:

chengyu = ep["chengyu"]
pinyin  = ep["pinyin"]
gloss   = ep["gloss"]
teaser  = ep["teaser"]
script  = ep["script"]

# Example manual tweak:
# teaser = "A quick bite about perspective."

print(chengyu, "|", pinyin)
print(gloss)
print("\nPreview script start:\n", script[:600])


不遗余力 | bù yí yú lì
spare no effort; do one's utmost

Preview script start:
 Welcome to Chengyu Bites — your quick summary on Chinese 成语. Today, we explore the theme of dedication and effort. [break 1s] The phrase is: 不遗余力, bù yí yú lì. [break 1s] 不 (bù) - not [break 0.5s] 遗 (yí) - leave behind [break 0.5s] 余 (yú) - surplus [break 0.5s] 力 (lì) - strength [break 1s] Full idiom again: 不遗余力 literally means 'not leaving behind any surplus of strength', figuratively it means to spare no effort or to do one’s utmost. [break 1s] Here’s the story behind it: [break 1.5s] This idiom originates from a historical account of a general who, during a crucial battle, made sure that ev


## 8) Build local assets (cover, transcript, optional TTS)

In [55]:

import datetime
from pathlib import Path
import json

today = datetime.date.today()
date_str = today.strftime("%Y-%m-%d")
slug_base = pinyin if USE_PINYIN_SLUG else chengyu
slug = slugify(slug_base)
folder = f"{date_str}-{slug}"

ep_dir = Path("build") / folder
ep_dir.mkdir(parents=True, exist_ok=True)

# cover
cover_png = draw_cover_png(chengyu, pinyin, gloss)
( ep_dir / "cover.png").write_bytes(cover_png)

# transcript
( ep_dir / "transcript.txt").write_text(script, encoding="utf-8")

# metadata
metadata = {
    "show": SHOW_NAME,
    "chengyu": chengyu,
    "pinyin": pinyin,
    "gloss": gloss,
    "teaser": teaser,
    "pubDate": today.isoformat()
}
( ep_dir / "metadata.json").write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")

audio_mp3 = b""
if DO_TTS:
    try:
        audio_mp3 = tts_mp3(script)
        ( ep_dir / "audio.mp3").write_bytes(audio_mp3)
        print("Audio generated:", (ep_dir/"audio.mp3").resolve())
    except Exception as e:
        print("TTS failed:", e)
else:
    print("Skipping TTS (DO_TTS=False)")

print("Built assets in:", ep_dir.resolve())


Audio generated: /Users/tilman/github3/chengyudaily/build/2025-08-24-bu-yi-yu-li/audio.mp3
Built assets in: /Users/tilman/github3/chengyudaily/build/2025-08-24-bu-yi-yu-li


## 9) Publish: GitHub Release (MP3) + one commit push

In [56]:
# --- Publish: GitHub Release (optional) + one commit push with site-hosted audio ---
import tempfile, shutil, subprocess, os, json
from pathlib import Path

def run(cmd, cwd=None, hide_token=False):
    display = " ".join(["***" if hide_token and "@" in str(x) else str(x) for x in cmd])
    print("+", display)
    subprocess.check_call(cmd, cwd=cwd)

if DRY_RUN:
    print("DRY_RUN=True — skipping GitHub release & push.")
else:
    # 0) Optional: create Release + upload MP3 for archival (site will play the local copy)
    audio_feed_url = ""
    audio_bytes = 0
    if DO_TTS and audio_mp3:
        tag  = f"v{today.strftime('%Y%m%d')}-{slug}"
        name = f"{chengyu} ({pinyin})"
        rel  = gh_create_release(REPO, tag=tag, name=name, body=f"Episode: {chengyu}")
        asset = gh_upload_asset(rel["upload_url"], filename=f"{folder}.mp3", data=audio_mp3, content_type="audio/mpeg")
        audio_feed_url = asset["browser_download_url"]
        audio_bytes = asset.get("size", len(audio_mp3))
        print("Release asset uploaded:", audio_feed_url)
    else:
        print("No release asset uploaded (TTS disabled or failed).")

    # 1) Clone repo
    tmp = tempfile.mkdtemp(prefix="chengyudaily_")
    try:
        token = os.environ.get("GITHUB_TOKEN")
        if not token:
            raise RuntimeError("GITHUB_TOKEN not set")
        repo_url = f"https://{token}@github.com/{REPO}.git"

        run(["git", "clone", "--depth", "1", repo_url, tmp], hide_token=True)
        run(["git", "config", "user.name", "Chengyu Publisher Bot"], cwd=tmp)
        run(["git", "config", "user.email", "actions@users.noreply.github.com"], cwd=tmp)

        # 2) Define destination folder (NOW dest_ep exists)
        dest_ep = Path(tmp) / "episodes" / folder
        dest_ep.mkdir(parents=True, exist_ok=True)

        # 3) Write episode assets
        (dest_ep / "cover.png").write_bytes(cover_png)
        (dest_ep / "transcript.txt").write_text(script, encoding="utf-8")
        (dest_ep / "metadata.json").write_text(json.dumps({
            "show": SHOW_NAME,
            "chengyu": chengyu,
            "pinyin": pinyin,
            "gloss": gloss,
            "teaser": teaser,
            "pubDate": today.isoformat()
        }, ensure_ascii=False, indent=2), encoding="utf-8")

        # 4) Write audio to SITE so mobile playback works (same-origin)
        audio_url = ""
        if DO_TTS and audio_mp3:
            (dest_ep / "audio.mp3").write_bytes(audio_mp3)
            audio_url = f"/episodes/{folder}/audio.mp3"
            if not audio_bytes:
                audio_bytes = len(audio_mp3)

        # 5) Build post front matter (cover + local audio)
        cover_path_for_web = f"/episodes/{folder}/cover.png"
        fm = {
            "layout": "post",
            "title": f"{chengyu} ({pinyin})",
            "date": f"{date_str} {PUBLISH_TIME_UTC}",
            "description": gloss,
            "cover_image": cover_path_for_web
        }
        if audio_url:
            fm["audio_url"] = audio_url                # used by site layout
            fm["audio_bytes"] = audio_bytes
        if audio_feed_url:
            fm["audio_feed_url"] = audio_feed_url      # optional: used by build_feed.py for RSS

        body_md = script_to_markdown(chengyu, pinyin, gloss, teaser, script)

        front = (
            "---\n"
            + "\n".join(
                f"{k}: {json.dumps(v, ensure_ascii=False) if not isinstance(v, (int,float)) else v}"
                for k, v in fm.items()
            )
            + "\n---\n\n"
        )
        post_md = front + body_md + "\n"

        posts_dir = Path(tmp) / "_posts"
        posts_dir.mkdir(exist_ok=True)
        (posts_dir / f"{date_str}-{slug}.md").write_text(post_md, encoding="utf-8")

        # 6) Commit once and push
        run(["git", "add", "."], cwd=tmp)
        run(["git", "commit", "-m", f"Add episode {folder}"], cwd=tmp)
        run(["git", "push", "origin", "main"], cwd=tmp, hide_token=True)

        print("\n✔ Pushed one commit. Pages workflow will build & deploy.")
        print("Episode page (after deploy):")
        print(f"{SITE_URL}/" + f"{today.strftime('%Y/%m/%d')}/{slug}.html")
        if audio_url:
            print("Site audio URL:", audio_url)
        if audio_feed_url:
            print("Release audio URL:", audio_feed_url)

    finally:
        shutil.rmtree(tmp, ignore_errors=True)


Release asset uploaded: https://github.com/kohlenberg/chengyudaily/releases/download/v20250824-bu-yi-yu-li/2025-08-24-bu-yi-yu-li.mp3
+ git clone --depth 1 *** /var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyudaily_1mlxrix4


Cloning into '/var/folders/wn/z035vl9x1_q0_8dhkvn9n24h0000gn/T/chengyudaily_1mlxrix4'...


+ git config user.name Chengyu Publisher Bot
+ git config user.email actions@users.noreply.github.com
+ git add .
+ git commit -m Add episode 2025-08-24-bu-yi-yu-li
[main a3193aa] Add episode 2025-08-24-bu-yi-yu-li
 5 files changed, 48 insertions(+)
 create mode 100644 _posts/2025-08-24-bu-yi-yu-li.md
 create mode 100644 episodes/2025-08-24-bu-yi-yu-li/audio.mp3
 create mode 100644 episodes/2025-08-24-bu-yi-yu-li/cover.png
 create mode 100644 episodes/2025-08-24-bu-yi-yu-li/metadata.json
 create mode 100644 episodes/2025-08-24-bu-yi-yu-li/transcript.txt
+ git push origin main

✔ Pushed one commit. Pages workflow will build & deploy.
Episode page (after deploy):
https://kohlenberg.github.io/chengyudaily/2025/08/24/bu-yi-yu-li.html
Site audio URL: /episodes/2025-08-24-bu-yi-yu-li/audio.mp3
Release audio URL: https://github.com/kohlenberg/chengyudaily/releases/download/v20250824-bu-yi-yu-li/2025-08-24-bu-yi-yu-li.mp3


To https://github.com/kohlenberg/chengyudaily.git
   7e5f9b7..a3193aa  main -> main


## 10) Done


**Notes**
- Ensure your repo has a **Pages workflow** (we recommended `.github/workflows/pages.yml`) and Pages **Source = GitHub Actions**.
- The feed (`podcast.xml`) can be built by your workflow (via `build_feed.py`) so no extra commit is needed.
- To avoid duplicate base URLs in the feed image, posts use `cover_image: "/episodes/.../cover.png"`.
- Slugs default to **pinyin** (`USE_PINYIN_SLUG=True`) for ASCII-safe URLs.

Re-run this notebook anytime for a new, unique episode. 🎉


In [60]:
from chengyu.config import settings
from chengyu.dedupe import list_existing_chengyu
from chengyu.gen import gen_unique_episode_strict, script_to_markdown
from chengyu.cover import draw_cover_png
from chengyu.tts import tts_mp3
from chengyu.publisher import publish_episode

# 0) Already-used idioms (full set)
forbidden = list_existing_chengyu(settings.REPO, settings.GITHUB_BRANCH)

# 1) Strict-unique generation
data = gen_unique_episode_strict(settings.SHOW_NAME, settings.GEN_MODEL, forbidden,
                                 batch_size=20, max_rounds=20)

# 2) Cover image
cover_png = draw_cover_png(settings.SHOW_NAME, data["chengyu"], data["pinyin"], data["gloss"])

# 3) TTS (disable by setting SKIP_TTS=True)
SKIP_TTS = False
audio_mp3 = None if SKIP_TTS else tts_mp3(data["script"], settings.TTS_MODEL, settings.TTS_VOICE)

# 4) Structured Markdown body
body_md = script_to_markdown(data["chengyu"], data["pinyin"], data["gloss"], data["teaser"], data["script"], settings.GEN_MODEL)

# 5) Publish
publish_episode(
    show_name=settings.SHOW_NAME,
    repo=settings.REPO,
    branch=settings.GITHUB_BRANCH,
    site_url=settings.SITE_URL,
    baseurl=settings.BASEURL,
    publish_time_utc=settings.PUBLISH_TIME_UTC,
    data=data,
    body_md=body_md,
    cover_png=cover_png,
    audio_mp3=audio_mp3,
    dry_run=settings.DRY_RUN,
)


ImportError: cannot import name 'settings' from 'chengyu.config' (/Users/tilman/github3/chengyudaily/chengyu/config.py)

In [59]:
import chengyu, importlib, inspect
print("chengyu package:", chengyu.__file__)
import chengyu.config
print("has settings?", hasattr(chengyu.config, "settings"))
print("dir(config):", dir(chengyu.config))
print("config file:", inspect.getsourcefile(chengyu.config))


chengyu package: /Users/tilman/github3/chengyudaily/chengyu/__init__.py
has settings? False
dir(config): ['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__']
config file: /Users/tilman/github3/chengyudaily/chengyu/config.py


In [65]:
import sys, os, importlib
sys.path.insert(0, os.path.abspath(".."))  # from notebooks/ to repo root

import chengyu
import chengyu.config
importlib.reload(chengyu)
importlib.reload(chengyu.config)

from chengyu.config import settings
print("OK:", settings)


ImportError: cannot import name 'settings' from 'chengyu.config' (/Users/tilman/github3/chengyudaily/chengyu/config.py)

In [64]:
import inspect, chengyu.config
print(open(inspect.getsourcefile(chengyu.config)).read())


