1  Setup & helpers

In [None]:
# %% [markdown]
# Cell 1 – imports & shared helpers
# --------------------------------
from pathlib import Path
import zipfile, os, re, html
from lxml import etree
from tqdm.auto import tqdm
import textile, dotenv
import nest_asyncio, asyncio
nest_asyncio.apply()   # allow nested event loops so Playwright Sync works in notebooks

from bs4 import BeautifulSoup
from playwright.async_api import async_playwright, TimeoutError as PWTimeout

dotenv.load_dotenv()
WA_EMAIL    = os.environ["WA_EMAIL"]
WA_PASSWORD = os.environ["WA_PASSWORD"]
WA_WORLD    = os.environ["WA_WORLD"]


2  Extract your Obsidian Portal backup

In [None]:
# %% Cell 2 – choose the backup
OP_BACKUP = Path("extracted/rocko_2025-05-05.zip")        # change me
WORK_DIR  = Path("extracted/op")
WORK_DIR.mkdir(exist_ok=True)

with zipfile.ZipFile(OP_BACKUP) as zf:
  zf.extractall(WORK_DIR)

# locate the XML (there’s only one)
try:
    # search recursively in case the XML is nested inside a sub‑folder
    export_xml = next(WORK_DIR.rglob("*.xml"))
except StopIteration:
    raise FileNotFoundError(
        "No .xml file found in the extracted backup. "
        "Double‑check OP_BACKUP or inspect WORK_DIR to confirm the archive structure."
    )
print(f"Using OP export file: {export_xml}")
tree       = etree.parse(str(export_xml))
root       = tree.getroot()

Structure discovery (run once to inspect)

In [None]:
# %% Cell 3 – peek at tag names
from collections import Counter

tags = Counter([el.tag for el in root.iter()])
tags

3  Parse into plain Python dicts

In [None]:
# %% Cell 4 – generic helpers (Element → dict)
def get(el, tag):
  n = el.find(tag)
  return (n.text or "").strip() if n is not None else ""

def record(el, wanted):
  return {k: get(el, k) for k in wanted}

characters = [
  record(c, ["name", "tagline", "description", "game_statblock"])
  for c in root.findall(".//character")
]

items = [
  record(i, ["name", "description", "tagline"])
  for i in root.findall(".//item")
]

wikis = [
  record(w, ["title", "body", "slug"])
  for w in root.findall(".//wiki_page")
]

logs = [
  record(l, ["title", "body", "post_date"])
  for l in root.findall(".//adventure_log_post")
]

4 Convert Textile ‑→ Markdown for World Anvil

In [None]:

from bs4 import NavigableString, Tag

def html_to_bbcode(html_frag: str) -> str:
  soup = BeautifulSoup(html_frag, "html.parser")

  def walk(node):
    if isinstance(node, NavigableString):
      return str(node)

    if not isinstance(node, Tag):
      return ""

    name   = node.name.lower()
    inside = "".join(walk(c) for c in node.children)

    # basic mapping for common tags
    tag_map = {
      ("strong", "b"):        ("[b]",   "[/b]"),
      ("em", "i"):            ("[i]",   "[/i]"),
      ("u",):                 ("[u]",   "[/u]"),
      ("strike", "del", "s"): ("[s]",   "[/s]"),
      ("code",):              ("[code]","[/code]"),
      ("blockquote",):        ("[quote]","[/quote]"),
    }
    for keys, (open_, close_) in tag_map.items():
      if name in keys:
        return f"{open_}{inside}{close_}"

    if name == "br":
      return "\n"
    if name == "p":
      return f"{inside}\n\n"
    if name in ("h1","h2","h3","h4","h5","h6"):
      level = name[1]
      return f"[h{level}]{inside}[/h{level}]\n"
    if name == "a":
      href = node.get("href","")
      return f"[url={href}]{inside}[/url]"
    if name == "img":
      src = node.get("src","")
      return f"[img]{src}[/img]"
    if name == "ul":
      return f"[ul]{inside}[/ul]\n"
    if name == "ol":
      return f"[ol]{inside}[/ol]\n"
    if name == "li":
      return f"[*]{inside}[/*]"

    return inside

def textile_to_bbcode(text):
  """
  Convert a Textile fragment to BBCode (World Anvil’s preferred markup).
  1. Textile → HTML        via `textile`
  2. HTML    → BBCode      via `html2bbcode`
  """
  html_frag = textile.textile(text or "")
  return html_to_bbcode(html_frag)

for col in ("description", "body", "game_statblock"):
  for bank in (characters, items, wikis, logs):
    for rec in bank:
      if col in rec:
        rec[col] = textile_to_bbcode(rec[col])

5 Headless upload via Playwright

Selector stability: WA rolls out UI tweaks frequently.
Inspect your own “Create Article” form (worldanvil.com/w/{WA_WORLD}/editor/new) with DevTools and adjust the CSS/XPath selectors below if they drift.

In [None]:
# %% Cell 6 – playwright session wrapper
class WA:
  async def __aenter__(self):
    self.pw = await async_playwright().start()
    # persistent context lets us stay logged‑in between actions
    self.ctx = await self.pw.chromium.launch_persistent_context(
      user_data_dir="pw_profile",
      headless=False          # flip to True once everything is stable
    )
    self.page = await self.ctx.new_page()
    self.page.on("console", lambda m: print("PAGE LOG:", m.text))
    self.page.on("requestfailed", lambda r: print("REQ FAIL:", r.url, r.failure))
    return self

  async def __aexit__(self, exc_type, exc, tb):
    await self.ctx.close()
    await self.pw.stop()

  # ---------- actions ----------
  async def login(self):
    # Navigate to login and wait for the DOM to be ready
    await self.page.goto("https://www.worldanvil.com/login", wait_until="domcontentloaded")

    # Wait until the username field is actually present & visible (handles Cloudflare / slow net)
    # await self.page.wait_for_selector("input#username", state="visible", timeout=30000)
    # await self.page.fill('input#username', WA_EMAIL)     # <input id="username" name="_username">
    # await self.page.fill('input#password', WA_PASSWORD)  # <input id="password" name="_password">
    # 
    # # Submit the form
    # await self.page.click('input#_submit')

  # Make sure every article gets a ≥3‑character title
  def safe_title(raw: str | None, fallback_prefix: str = "Untitled") -> str:
    if raw and len(raw.strip()) >= 3:
      return raw.strip()
    # fallback: generate a unique placeholder
    safe_title.counter += 1
    return f"{fallback_prefix} {safe_title.counter}"
    safe_title.counter = 0
    
  async def new_article(self, *, template="generic", title="", content="", summary=""):
    """
    Use the dashboard’s green **Create a new Article** button, then pick a template
    from the modal. This is the only reliable path as of May‑2025.

    Supported template keys:
      generic, character, item
    """
    # 1.  Land on the world Summary page (the button lives here)
    await self.page.goto(f"https://www.worldanvil.com/world/{WA_WORLD}/summary", wait_until="domcontentloaded")

    # 2.  Wait for the green button and ensure it’s in view, then click
    button = self.page.locator("a.article-quick-create-trigger").first
    await button.wait_for(state="visible", timeout=15000)
    await button.scroll_into_view_if_needed()
    await button.click()

    # 3.  Wait for the modal container to appear
    await self.page.wait_for_selector("section.chakra-modal__content", timeout=10000)
    # If the modal didn’t appear, abort early with a helpful error
    if not await self.page.locator("section.chakra-modal__content").is_visible():
        raise RuntimeError("Template‑picker modal did not open; the Create button may have failed.")

    # 4.  Pick the correct template card inside the modal
    card_selector_map = {
      "generic":   ".athena-template-generic-template-card",
      "character": ".athena-template-character-template-card",
      "item":      ".athena-template-item-template-card",
    }
    card_sel = card_selector_map.get(template, ".athena-template-generic-template-card")
    await self.page.click(card_sel)

    # 5.  The template card opens the editor in a *new* tab; switch context
    editor_page = self.page.context.pages[-1]   # last opened tab
    await editor_page.wait_for_load_state()

    # 6.  Fill in the editor fields
    title_input = editor_page.locator('input[placeholder="Title"]').first
    await title_input.fill(title[:255])

    frame = editor_page.frame_locator('iframe[id$="description_ifr"]')
    await frame.locator("body").fill(content)

    try:
      await editor_page.fill("textarea#article_excerpt", summary[:511])
    except PWTimeout:
      pass

    # 7.  Save (button text is “Create”)
    await editor_page.click('button:has-text("Create")')
    await editor_page.wait_for_timeout(2000)

    # 8.  Close the tab and return focus to the main dashboard page
    await editor_page.close()

  # Add similar helpers for characters/items if you want specialised templates

6 Batch‑create content

In [59]:
from tqdm.asyncio import tqdm_asyncio  # async progress bar
async def main():
  async with WA() as wa:
    await wa.login()

    for w in tqdm_asyncio(wikis, desc="Wikis"):
      await wa.new_article(
        template="generic",
        title=w["title"],
        content=w["body"],
        summary=w["body"][:200]
      )

    for c in tqdm_asyncio(characters, desc="Characters"):
      await wa.new_article(
        template="character",
        title=c["name"],
        content=(c.get("description") or "") + ("\n\n" if c.get("game_statblock") else "") + (c.get("game_statblock") or ""),
        summary=c["tagline"]
      )

    for it in tqdm_asyncio(items, desc="Items"):
      await wa.new_article(
        template="item",
        title=it["name"],
        content=it["description"],
        summary=it["tagline"]
      )

    for log in tqdm_asyncio(logs, desc="Adventure Logs"):
      await wa.new_article(
        template="generic",
        title=log["title"],
        content=log["body"],
        summary=log["post_date"]
      )

await main()  # Jupyter supports top‑level await

REQ FAIL: https://www.google.com/ccm/collect?tid=AW-825669689&en=page_view&dl=https%3A%2F%2Fwww.worldanvil.com%2Flogin&scrsrc=www.googletagmanager.com&frm=0&rnd=711725009.1746445358&dt=Login%20%7C%20Welcome%20to%20World%20Anvil%20%7C%20World%20Anvil&auid=2021977298.1746443149&navt=n&npa=0&gtm=45be54u1v9121218595za200&gcd=13l3l3l3l1l1&dma=0&tag_exp=101509157~103101750~103101752~103116026~103130495~103130497~103200004~103233427~103251618~103251620&tft=1746445357650&tfd=550&apve=1&apvf=f net::ERR_ABORTED
REQ FAIL: https://analytics.google.com/g/collect?v=2&tid=G-ZW5R4SHNYY&gtm=45je54u1v888737777z8857243476za200zb857243476&_p=1746445357636&_gaz=1&gcd=13l3l3l3l1l1&npa=0&dma=0&tag_exp=101509157~103101750~103101752~103116025~103200001~103211513~103233427~103251618~103251620&ptag_exp=101509156~103101747~103101749~103116026~103200004~103233424~103251618~103251620&cid=1010083131.1746443149&ecid=2079364196&ul=en-us&sr=1280x720&uaa=arm&uab=64&uafvl=Not.A%252FBrand%3B99.0.0.0%7CChromium%3B136.0.710

Wikis: 0it [00:00, ?it/s]
Characters:   0%|          | 0/247 [00:00<?, ?it/s]

PAGE LOG: [PRODROMOS] Loading from API due to missing or stale data
PAGE LOG: [PRODROMOS] No metadata available, skipping world index load
PAGE LOG: delay: initialised
PAGE LOG: delay: initialised
PAGE LOG: binding Diceroller
PAGE LOG: trackable events bound
PAGE LOG: finished loading Javascript.
PAGE LOG: delay: initialised
PAGE LOG: delay: initialised
PAGE LOG: delay: initialised
PAGE LOG: explorer detected
PAGE LOG: initiating explorer
PAGE LOG: binding mention system
PAGE LOG: The Mention class is being loaded on Code
PAGE LOG: Tooltipster: one or more tooltips are already attached to the element below. Ignoring.
PAGE LOG: JSHandle@node
PAGE LOG: Tooltipster: one or more tooltips are already attached to the element below. Ignoring.
PAGE LOG: JSHandle@node
PAGE LOG: Tooltipster: one or more tooltips are already attached to the element below. Ignoring.
PAGE LOG: JSHandle@node
PAGE LOG: Tooltipster: one or more tooltips are already attached to the element below. Ignoring.
PAGE LOG: JS

Characters:   0%|          | 0/247 [00:06<?, ?it/s]


TargetClosedError: BrowserContext.close: Target page, context or browser has been closed