# Step 1 — start the server (uses HashingEmbedder; kills any prior server in this kernel)

In [None]:
# Step 1: start FastAPI server (background)
import os, sys, time, subprocess
  
HOST, PORT = "127.0.0.1", 8000
BASE = f"http://{HOST}:{PORT}"
  
# Stop previous server in this kernel if running
try:
    SERVER_PROC
except NameError:
      SERVER_PROC = None
  
if SERVER_PROC and SERVER_PROC.poll() is None:
      SERVER_PROC.terminate()
      try:
          SERVER_PROC.wait(timeout=5)
      except Exception:
          SERVER_PROC.kill()
  
env = os.environ.copy()
env["STYLGEN_LOG_LEVEL"] = "INFO"   # or "DEBUG"
env["STYLGEN_DEBUG"] = "0"
env["STYLGEN_EMBEDDER"] = "hash"    # we'll switch to ST later
env["OLLAMA_BASE"] = "http://127.0.0.1:11434"
env["OLLAMA_MODEL"] = "llama3:8b"
  
cmd = [
      sys.executable, "-m", "uvicorn", "stylgen_v0.main:app",
      "--host", HOST, "--port", str(PORT), "--log-level", "info"
  ]
SERVER_PROC = subprocess.Popen(cmd, env=env)
print("Server PID:", SERVER_PROC.pid, "BASE:", BASE)
time.sleep(1.5)


Server PID: 18355 BASE: http://127.0.0.1:8000


2025-09-06 16:20:28,089 INFO [stylgen] embedder.selected kind=hash dim=384
INFO:     Started server process [18355]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


In [None]:
# env["OLLAMA_BASE"] = "http://127.0.0.1:11434"
# env["OLLAMA_MODEL"] = "llama3:8b"

# Step 2 — health check (waits briefly for the server to come up)

In [2]:
# Step 2: health check
import time, httpx
  
for _ in range(40):  # ~10s total
      try:
          r = httpx.get(f"{BASE}/health", timeout=2.0)
          print("Health:", r.status_code, r.json())
          break
      except Exception:
          time.sleep(0.25)
else:
      raise RuntimeError("Server did not become healthy in time")

INFO:     127.0.0.1:48590 - "GET /health HTTP/1.1" 200 OK
Health: 200 {'status': 'ok'}


# Step 3 — create a persona (example samples)

In [4]:
# Step 3: create persona (example)
import httpx, textwrap, json
  
user_id = "u_nb1"  # change if you like
  
persona_payload = {
      "user_id": user_id,
      "samples": [
          "Shipped our onboarding revamp. Short, friendly checklists beat long docs.",
          "If your standup drags, it's a smell. Keep it under 10 minutes, tops.",
          "Docs are a product. If you don't version them, they'll version you."
      ],
      "preferences": {
          "tone_descriptors": ["forthright", "practical", "lightly humorous"],
          "taboo_phrases": ["In today's fast-paced world"],
          "formality": 2,
          "emoji_ok": True,
          "hashtags_niche": True,
          "structure_pref": "story-first"
      }
  }
  
r = httpx.post(f"{BASE}/persona", json=persona_payload, timeout=15.0)
r.raise_for_status()
resp = r.json()
card = resp["persona"]
  
print("✓ Persona created")
print("user_id:", resp["user_id"], "num_samples:", resp["num_samples"])
print("exemplar_ids:", card["exemplar_ids"])
print("centroid_dim:", None if card["centroid"] is None else len(card["centroid"]))
print("tone:", card["preferences"]["tone_descriptors"])


INFO:     127.0.0.1:59642 - "POST /persona HTTP/1.1" 200 OK
✓ Persona created
user_id: u_nb1 num_samples: 3
exemplar_ids: ['5794076f-f47a-4b51-bb89-c36adc0893d8', 'd1894100-8da2-4282-9778-755e434e95ac', '472ddc45-bc60-4faa-bb63-7e3fae80fa84']
centroid_dim: 384
tone: ['forthright', 'practical', 'lightly humorous']


2025-09-06 16:24:29,736 INFO [stylgen] persona.create user_id=u_nb1 samples=3
2025-09-06 16:24:29,737 INFO [stylgen] persona.created user_id=u_nb1 exemplars=3 centroid=yes


# Step 4 — generate 2 variants and preview scores

In [5]:
# Step 4: generate 2 variants (non-stream), quick preview
import httpx, textwrap
  
gen_payload = {
      "user_id": user_id,  # from Step 3
      "brief": {
          "keywords": ["onboarding", "dev teams"],
          "goal": "educate",
          "audience": "engineering managers",
          "cta": "Comment with your experience",
          "length_hint": 900,
          "emoji": True,
      },
      "num_variants": 2,
      "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": 256},  # bump num_predict for longer drafts
  }
  
r = httpx.post(f"{BASE}/generate", json=gen_payload, timeout=180.0)
r.raise_for_status()
g = r.json()
  
gen_id = g["generation_id"]
chosen_text = g["chosen"]["text"].strip()
preview = textwrap.shorten(chosen_text.replace("\n", " / "), width=160)
  
print(f"✓ Generated {len(g['variants'])} variants (generation_id={gen_id})")
print("Chosen preview:", preview)
print("Scores:")
for i, v in enumerate(g["variants"]):
    s = v["score"]
    print(f"  v{i}: style={s['style_similarity']:.3f} nov={s['novelty']:.3f} length_ok={s['length_ok']}")


2025-09-06 16:33:38,968 INFO [stylgen] generate.request user_id=u_nb1 goal=educate keywords=onboarding,dev teams variants=2 len_hint=900


INFO:     127.0.0.1:33284 - "POST /generate HTTP/1.1" 200 OK
✓ Generated 2 variants (generation_id=060bfecc-ea30-409f-94d5-e606430ed064)
Chosen preview: Here's a draft for a LinkedIn post: / / "📊 As an engineering manager, I've seen my fair share of onboarding disasters. But after our recent revamp, I'm [...]
Scores:
  v0: style=0.330 nov=0.763 length_ok=True
  v1: style=0.335 nov=0.707 length_ok=True


2025-09-06 16:33:44,299 INFO [stylgen.pipeline] variants.sorted count=2 top_sim=0.330 top_nov=0.763 length_ok=True
2025-09-06 16:33:44,299 INFO [stylgen] generate.done user_id=u_nb1 generation_id=060bfecc-ea30-409f-94d5-e606430ed064 chosen_sim=0.330 chosen_novelty=0.763


# Step 5 — show full variant texts (chosen + all)

In [6]:
# Step 5: display full texts and scores
  # (If NameError: g is not defined, re-run Step 4)
  
print("=== Chosen Variant ===")
print(g["chosen"]["text"])
print("\nChosen score:", g["chosen"]["score"])
  
print("\n=== All Variants ===")
for i, v in enumerate(g["variants"]):
      print("\n" + "="*80)
      print(f"Variant {i}  |  length={len(v['text'])} chars")
      print(v["text"])
      print("\nScore:", v["score"])


=== Chosen Variant ===
Here's a draft for a LinkedIn post:

"📊 As an engineering manager, I've seen my fair share of onboarding disasters. But after our recent revamp, I'm convinced: short, friendly checklists are the way to go! 🚀 Gone are the days of lengthy docs that leave new devs feeling lost.

In our previous state, it took an average dev team member 3-4 weeks to get up and running. Now? We're talking a swift 1-2 weeks. The difference? Clear, actionable steps in a simple checklist. No more sifting through unnecessary details! 👀

So, what's your experience with onboarding? Have you seen similar results or struggled with lengthy docs? Comment below with your story! 💬 #DevOps #EngineeringManagement #OnboardingBestPractices"

Let me know if this meets your requirements! 😊

Chosen score: {'style_similarity': 0.33007124066352844, 'novelty': 0.7634750455617905, 'structure_ok': True, 'length_ok': True}

=== All Variants ===

Variant 0  |  length=760 chars
Here's a draft for a LinkedIn pos

# Step 6 — stream a live draft (prints tokens as they arrive)

In [7]:
# Step 6: streaming generation (SSE)
import asyncio, httpx
  
async def stream_post():
      req = {
          "user_id": user_id,  # from Step 3
          "brief": {
              "keywords": ["onboarding", "dev teams"],
              "goal": "educate",
              "audience": "engineering managers",
              "cta": "Comment with your experience",
              "length_hint": 900,
              "emoji": True,
          },
          "num_variants": 1,
          "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": 256},
      }
      async with httpx.AsyncClient(timeout=300.0) as client:
          async with client.stream("POST", f"{BASE}/generate/stream", json=req) as resp:
              resp.raise_for_status()
              print("SSE connected")
              printed = 0
              async for line in resp.aiter_lines():
                  if not line:
                      continue
                  if line.startswith("event: meta"):
                      print("\n[META]")
                  elif line.startswith("event: done"):
                      print("\n[done]")
                      break
                  elif line.startswith("data: "):
                      chunk = line[len("data: "):]
                      print(chunk, end="", flush=True)
                      printed += len(chunk)
                      if printed > 1200:
                          print("\n[truncated display after ~1200 chars]")
                          # keep draining until 'done' event
              # end for
  
await stream_post()


INFO:     127.0.0.1:40850 - "POST /generate/stream HTTP/1.1" 200 OK
SSE connected

[META]
{'exemplars': ['Shipped our onboarding revamp. Short, friendly checklists beat long docs.', "If your standup drags, it's a smell. Keep it under 10 minutes, tops.", "Docs are a product. If you don't version them, they'll version you."], 'goal': 'educate', 'keywords': ['onboarding', 'dev teams']}

2025-09-06 16:42:05,647 INFO [stylgen] generate.stream.request user_id=u_nb1 goal=educate keywords=onboarding,dev teams


Here's a draft:"Onboarding dev teams is like building a LEGO castle - it takes precision, patience, and a solid foundation 🏰. I've seen too many new hires get lost in sea of documentation or left dangling with unclear expectations.So, what works? For me, it's about setting clear goals and priorities up front. A concise onboarding plan beats a lengthy manual any day! And don't even get me started on the power of regular check-ins - it's like having a trusty LEGO instruction book 📚#EngineeringManagement #DevTeamOnboarding #AgileLessons"Feel free to comment with your experience!
[done]


# Step 7 — submit feedback for the chosen generation

In [8]:
# Step 7: submit feedback (positive case)
import httpx, json
  
assert "g" in globals(), "Please run Step 4 to create a generation first."
fb_payload = {
      "user_id": user_id,                 # same as used to generate
      "generation_id": g["generation_id"],# from Step 4
      "rating": 4,                        # 1..5
      "tags": ["good tone", "useful CTA"]
  }
r = httpx.post(f"{BASE}/feedback", json=fb_payload, timeout=10.0)
r.raise_for_status()
print("Feedback response:", r.json())


INFO:     127.0.0.1:51604 - "POST /feedback HTTP/1.1" 200 OK
Feedback response: {'status': 'received'}


# END of normal process with HashingEmbedder. next we try sentence-transformer

# Step 8a — ensure sentence-transformers is installed

In [11]:
# Step 8a: install optional extra (hf-embeddings) with uv
import os, shutil, subprocess
  
# Find repo root (where pyproject.toml lives)
root = os.getcwd()
if not os.path.exists(os.path.join(root, "pyproject.toml")):
      root = os.path.abspath(os.path.join(root, ".."))
  
assert shutil.which("uv"), "uv CLI not found on PATH. Install uv, then retry."
print("Running: uv sync --extra hf-embeddings (cwd:", root, ")")
subprocess.check_call(["uv", "sync", "--extra", "hf-embeddings"], cwd=root)
print("uv sync complete")


Running: uv sync --extra hf-embeddings (cwd: /home/gluttony47/WORK/personal/claude_pogo/stylgen_v0 )


[2mResolved [1m75 packages[0m [2min 0.85ms[0m[0m
[36m[1mDownloading[0m[39m networkx [2m(1.9MiB)[0m
[36m[1mDownloading[0m[39m pillow [2m(6.3MiB)[0m
[36m[1mDownloading[0m[39m setuptools [2m(1.1MiB)[0m
[36m[1mDownloading[0m[39m nvidia-nvjitlink-cu12 [2m(37.4MiB)[0m
[36m[1mDownloading[0m[39m scipy [2m(33.5MiB)[0m
[36m[1mDownloading[0m[39m scikit-learn [2m(9.0MiB)[0m
[36m[1mDownloading[0m[39m hf-xet [2m(3.0MiB)[0m
[36m[1mDownloading[0m[39m nvidia-cudnn-cu12 [2m(674.0MiB)[0m
[36m[1mDownloading[0m[39m triton [2m(148.4MiB)[0m
[36m[1mDownloading[0m[39m torch [2m(846.8MiB)[0m
[36m[1mDownloading[0m[39m nvidia-curand-cu12 [2m(60.7MiB)[0m
[36m[1mDownloading[0m[39m nvidia-cusolver-cu12 [2m(255.1MiB)[0m
[36m[1mDownloading[0m[39m nvidia-cufft-cu12 [2m(184.2MiB)[0m
[36m[1mDownloading[0m[39m nvidia-cusparselt-cu12 [2m(273.9MiB)[0m
[36m[1mDownloading[0m[39m nvidia-nccl-cu12 [2m(307.4MiB)[0m
[36m[1mDownloadin

uv sync complete


[2mInstalled [1m40 packages[0m [2min 177ms[0m[0m
 [31m-[39m [1masttokens[0m[2m==3.0.0[0m
 [32m+[39m [1mcharset-normalizer[0m[2m==3.4.3[0m
 [31m-[39m [1mcomm[0m[2m==0.2.3[0m
 [31m-[39m [1mdebugpy[0m[2m==1.8.16[0m
 [31m-[39m [1mdecorator[0m[2m==5.2.1[0m
 [31m-[39m [1mexecuting[0m[2m==2.2.1[0m
 [32m+[39m [1mfilelock[0m[2m==3.19.1[0m
 [32m+[39m [1mfsspec[0m[2m==2025.9.0[0m
 [32m+[39m [1mhf-xet[0m[2m==1.1.9[0m
 [32m+[39m [1mhuggingface-hub[0m[2m==0.34.4[0m
 [31m-[39m [1mipykernel[0m[2m==6.30.1[0m
 [31m-[39m [1mipython[0m[2m==9.5.0[0m
 [31m-[39m [1mipython-pygments-lexers[0m[2m==1.1.1[0m
 [31m-[39m [1mjedi[0m[2m==0.19.2[0m
 [32m+[39m [1mjinja2[0m[2m==3.1.6[0m
 [32m+[39m [1mjoblib[0m[2m==1.5.2[0m
 [31m-[39m [1mjupyter-client[0m[2m==8.6.3[0m
 [31m-[39m [1mjupyter-core[0m[2m==5.8.1[0m
 [32m+[39m [1mmarkupsafe[0m[2m==3.0.2[0m
 [31m-[39m [1mmatplotlib-inline[0m[2m==0.1.7[0

# Step 8b — restart server with ST embedder (e5-large-v2)

In [12]:
# Step 8b: restart server with ST embedder (e5-large-v2)
import os, sys, time, subprocess
  
# Stop previous server
try:
      SERVER_PROC
except NameError:
      SERVER_PROC = None
  
if SERVER_PROC and SERVER_PROC.poll() is None:
      SERVER_PROC.terminate()
      try:
          SERVER_PROC.wait(timeout=5)
      except Exception:
          SERVER_PROC.kill()
  
HOST, PORT = "127.0.0.1", 8000
BASE = f"http://{HOST}:{PORT}"
  
env = os.environ.copy()
env["STYLGEN_LOG_LEVEL"] = "INFO"
env["STYLGEN_DEBUG"] = "0"
env["STYLGEN_EMBEDDER"] = "st"
env["STYLGEN_ST_MODEL"] = "intfloat/e5-large-v2"  # bigger, higher quality
  
cmd = [sys.executable, "-m", "uvicorn", "stylgen_v0.main:app",
         "--host", HOST, "--port", str(PORT), "--log-level", "info"]
SERVER_PROC = subprocess.Popen(cmd, env=env)
print("Server PID:", SERVER_PROC.pid, "BASE:", BASE)
time.sleep(1.5)

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [18355]


Server PID: 19129 BASE: http://127.0.0.1:8000


2025-09-06 17:00:54,317 INFO [stylgen] embedder.selected kind=st model=intfloat/e5-large-v2
INFO:     Started server process [19129]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


# Step 8c — health check

In [13]:
# Step 8c: health check
import time, httpx
for _ in range(40):
      try:
          r = httpx.get(f"{BASE}/health", timeout=2.0)
          print("Health:", r.status_code, r.json())
          break
      except Exception:
          time.sleep(0.25)
else:
      raise RuntimeError("Server did not become healthy in time")


INFO:     127.0.0.1:52452 - "GET /health HTTP/1.1" 200 OK
Health: 200 {'status': 'ok'}


# Step 9 — re-create persona (fresh in-memory store after restart)

In [14]:
# Step 9: create persona again (first ST call may take a minute to load model)
import httpx
  
user_id = "u_nb1_st"  # new ID to keep runs separate
persona_payload = {
      "user_id": user_id,
      "samples": [
          "Shipped our onboarding revamp. Short, friendly checklists beat long docs.",
          "If your standup drags, it's a smell. Keep it under 10 minutes, tops.",
          "Docs are a product. If you don't version them, they'll version you."
      ],
      "preferences": {
          "tone_descriptors": ["forthright", "practical", "lightly humorous"],
          "taboo_phrases": ["In today's fast-paced world"],
          "formality": 2,
          "emoji_ok": True,
          "hashtags_niche": True,
          "structure_pref": "story-first"
      }
  }
r = httpx.post(f"{BASE}/persona", json=persona_payload, timeout=180.0)
r.raise_for_status()
resp = r.json()
card = resp["persona"]
print("✓ Persona created (ST)")
print("user_id:", resp["user_id"], "num_samples:", resp["num_samples"])
print("centroid_dim:", None if card["centroid"] is None else len(card["centroid"]))


2025-09-06 17:02:20,532 INFO [stylgen] persona.create user_id=u_nb1_st samples=3


INFO:     127.0.0.1:56748 - "POST /persona HTTP/1.1" 200 OK
✓ Persona created (ST)
user_id: u_nb1_st num_samples: 3
centroid_dim: 1024


2025-09-06 17:02:20,795 INFO [stylgen] persona.created user_id=u_nb1_st exemplars=3 centroid=yes


# Step 10 — generate 2 variants (with ST embedder)

In [15]:
# Step 10: generate (ST)
import httpx, textwrap
  
gen_payload = {
      "user_id": user_id,  # "u_nb1_st" from Step 9
      "brief": {
          "keywords": ["onboarding", "dev teams"],
          "goal": "educate",
          "audience": "engineering managers",
          "cta": "Comment with your experience",
          "length_hint": 900,
          "emoji": True,
      },
      "num_variants": 3,
      "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": 256},
  }
  
r = httpx.post(f"{BASE}/generate", json=gen_payload, timeout=180.0)
r.raise_for_status()
g_st = r.json()
  
print(f"✓ Generated {len(g_st['variants'])} variants (generation_id={g_st['generation_id']})")
for i, v in enumerate(g_st["variants"]):
      s = v["score"]
      print(f"  v{i}: style={s['style_similarity']:.3f} nov={s['novelty']:.3f} length_ok={s['length_ok']}")


2025-09-06 17:04:30,067 INFO [stylgen] generate.request user_id=u_nb1_st goal=educate keywords=onboarding,dev teams variants=3 len_hint=900


INFO:     127.0.0.1:46678 - "POST /generate HTTP/1.1" 200 OK
✓ Generated 3 variants (generation_id=62c6a40f-1cf1-4d2b-bbb4-53016c3c5c31)
  v0: style=0.886 nov=0.134 length_ok=False
  v1: style=0.877 nov=0.124 length_ok=True
  v2: style=0.865 nov=0.138 length_ok=True


2025-09-06 17:04:36,464 INFO [stylgen.pipeline] variants.sorted count=3 top_sim=0.886 top_nov=0.134 length_ok=False
2025-09-06 17:04:36,464 INFO [stylgen] generate.done user_id=u_nb1_st generation_id=62c6a40f-1cf1-4d2b-bbb4-53016c3c5c31 chosen_sim=0.886 chosen_novelty=0.134


# Step 11 — show full texts (chosen + all)

In [16]:
# Step 11: display full texts (ST)
print("=== Chosen Variant (ST) ===")
print(g_st["chosen"]["text"])
print("\nChosen score:", g_st["chosen"]["score"])
  
print("\n=== All Variants (ST) ===")
for i, v in enumerate(g_st["variants"]):
      print("\n" + "="*80)
      print(f"Variant {i}  |  length={len(v['text'])} chars")
      print(v["text"])
      print("\nScore:", v["score"])


=== Chosen Variant (ST) ===
Here's my attempt:

"Just finished onboarding our new dev team members 🤩! I've learned that short, snappy checklists > lengthy docs. Our new crew is up to speed in no time. Want to know my secret? 🤔 It starts with a clear plan, not a 50-page manual. Try it out and let me know your experience! #devops #onboardinghacks"

Let me know if this meets your expectations or if I need to make any changes! 😊

Chosen score: {'style_similarity': 0.885976254940033, 'novelty': 0.1336246132850647, 'structure_ok': True, 'length_ok': False}

=== All Variants (ST) ===

Variant 0  |  length=400 chars
Here's my attempt:

"Just finished onboarding our new dev team members 🤩! I've learned that short, snappy checklists > lengthy docs. Our new crew is up to speed in no time. Want to know my secret? 🤔 It starts with a clear plan, not a 50-page manual. Try it out and let me know your experience! #devops #onboardinghacks"

Let me know if this meets your expectations or if I need to mak

# Step 12 — stream a live draft (with ST embedder)

In [17]:
# Step 12: streaming generation (SSE) with sentence-transformers persona
  # Note: streamed text is raw (no critique step applied while streaming).
import asyncio, httpx
  
async def stream_post_st():
      req = {
          "user_id": user_id,  # "u_nb1_st" from Step 9
          "brief": {
              "keywords": ["onboarding", "dev teams"],
              "goal": "educate",
              "audience": "engineering managers",
              "cta": "Comment with your experience",
              "length_hint": 900,
              "emoji": True,
          },
          "num_variants": 1,
          "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": 256},
      }
      async with httpx.AsyncClient(timeout=300.0) as client:
          async with client.stream("POST", f"{BASE}/generate/stream", json=req) as resp:
              resp.raise_for_status()
              print("SSE connected (ST)")
              last_event = None
              printed = 0
              async for line in resp.aiter_lines():
                  if not line:
                      continue
                  if line.startswith("event: "):
                      last_event = line.split("event: ", 1)[1].strip()
                      if last_event == "meta":
                          print("\n[META]", end=" ")
                      elif last_event == "done":
                          print("\n[done]")
                          break
                      continue
                  if line.startswith("data: "):
                      data = line[len("data: "):]
                      if last_event == "meta":
                          print(data)  # prints exemplar previews and brief meta
                          last_event = None
                      else:
                          print(data, end="", flush=True)
                          printed += len(data)
                          if printed > 1200:
                              print("\n[truncated display after ~1200 chars]")
                              # keep draining until 'done' event
              # end for
  
await stream_post_st()

INFO:     127.0.0.1:56420 - "POST /generate/stream HTTP/1.1" 200 OK
SSE connected (ST)

[META] {'exemplars': ['Shipped our onboarding revamp. Short, friendly checklists beat long docs.', "If your standup drags, it's a smell. Keep it under 10 minutes, tops.", "Docs are a product. If you don't version them, they'll version you."], 'goal': 'educate', 'keywords': ['onboarding', 'dev teams']}
Here's a

2025-09-06 17:08:32,468 INFO [stylgen] generate.stream.request user_id=u_nb1_st goal=educate keywords=onboarding,dev teams


 draft:Shipped our onboarding revamp 🚀! I've learned that dev teams can't afford to waste time on unnecessary docs. Instead, I use short, friendly checklists that get new team members up and running in no time.I recall one engineer who took 3 weeks to onboard because of lengthy documentation. That's 15 working days of productivity lost 🕰️! Now, we've cut the onboarding process by 75% using our new approach.Want to learn how you can streamline your dev team onboarding? Comment with your experience and let's discuss best practices 👇 #EngineeringManagementTips #DevTeamOnboarding #SoftwareDevelopmentBestPractices
[done]


# Step 13 — create your own persona (edit samples, then run)

In [18]:
# Step 13: create your own persona (edit samples below)
  
import httpx
  
user_id_custom = "u_custom1"  # change if you like
custom_samples = [
      "Your sample post 1 (paste real text here).",
      "Your sample post 2 (paste real text here).",
      "Your sample post 3 (paste real text here).",
  ]
preferences_custom = {
      "tone_descriptors": ["direct", "witty", "contrarian"],
      "taboo_phrases": ["As an AI", "In today's fast-paced world"],
      "formality": 2,
      "emoji_ok": True,
      "hashtags_niche": True,
      "structure_pref": "story-first",
  }
  
r = httpx.post(f"{BASE}/persona", json={
      "user_id": user_id_custom,
      "samples": custom_samples,
      "preferences": preferences_custom
  }, timeout=180.0)
r.raise_for_status()
resp_c = r.json()
print("✓ Persona created:", resp_c["user_id"], "samples:", resp_c["num_samples"])
print("exemplar_ids:", resp_c["persona"]["exemplar_ids"])
print("centroid_dim:", None if resp_c["persona"]["centroid"] is None else len(resp_c["persona"]
  ["centroid"]))

INFO:     127.0.0.1:55664 - "POST /persona HTTP/1.1" 200 OK
✓ Persona created: u_custom1 samples: 3
exemplar_ids: ['524b5bb3-6211-4fd4-bf54-f2d032485aca', 'ed5aa321-2ba9-4460-acae-ef6a9ffd7ca2', '7afe07d9-39b3-4111-a60f-f284861e3465']
centroid_dim: 1024


2025-09-06 17:18:11,540 INFO [stylgen] persona.create user_id=u_custom1 samples=3
2025-09-06 17:18:11,632 INFO [stylgen] persona.created user_id=u_custom1 exemplars=3 centroid=yes


# Step 14 — generate for your custom persona (non-stream) and preview scores

In [19]:
# Step 14: generate for your custom persona
import httpx, textwrap
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
gen_payload = {
      "user_id": user_id_custom,
      "brief": {
          "keywords": ["onboarding", "dev teams"],  # edit if you like
          "goal": "educate",
          "audience": "engineering managers",
          "cta": "Comment with your experience",
          "length_hint": 900,
          "emoji": True,
      },
      "num_variants": 2,
      "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": 256},
  }
  
r = httpx.post(f"{BASE}/generate", json=gen_payload, timeout=180.0)
r.raise_for_status()
g_custom = r.json()
  
print(f"✓ Generated {len(g_custom['variants'])} variants (generation_id={g_custom['generation_id']})")
for i, v in enumerate(g_custom["variants"]):
      s = v["score"]
      print(f"  v{i}: style={s['style_similarity']:.3f} nov={s['novelty']:.3f} length_ok={s['length_ok']}")
print("Chosen preview:", textwrap.shorten(g_custom["chosen"]["text"].replace("\n", " / "),
  width=160))


2025-09-06 17:20:39,890 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=2 len_hint=900


INFO:     127.0.0.1:57082 - "POST /generate HTTP/1.1" 200 OK
✓ Generated 2 variants (generation_id=b18c7669-a07d-4348-9187-7b5e8063b309)
  v0: style=0.822 nov=0.181 length_ok=True
  v1: style=0.786 nov=0.213 length_ok=True
Chosen preview: Here's my attempt at crafting a LinkedIn post: / / "Confession time, fellow engineering managers: I'm guilty of making dev teams feel like they're trying [...]


2025-09-06 17:20:45,288 INFO [stylgen.pipeline] variants.sorted count=2 top_sim=0.822 top_nov=0.181 length_ok=True
2025-09-06 17:20:45,289 INFO [stylgen] generate.done user_id=u_custom1 generation_id=b18c7669-a07d-4348-9187-7b5e8063b309 chosen_sim=0.822 chosen_novelty=0.181


# Step 15 — display full texts (custom persona)

In [20]:
# Step 15: display full texts (custom persona)
assert "g_custom" in globals(), "Please run Step 14 first."
  
print("=== Chosen Variant (custom) ===")
print(g_custom["chosen"]["text"])
print("\nChosen score:", g_custom["chosen"]["score"])
  
print("\n=== All Variants (custom) ===")
for i, v in enumerate(g_custom["variants"]):
      print("\n" + "="*80)
      print(f"Variant {i}  |  length={len(v['text'])} chars")
      print(v["text"])
      print("\nScore:", v["score"])


=== Chosen Variant (custom) ===
Here's my attempt at crafting a LinkedIn post:

"Confession time, fellow engineering managers: I'm guilty of making dev teams feel like they're trying to solve a puzzle blindfolded during onboarding 🤯. How many times have we seen talented engineers struggle to find their footing, only to realize that the issue wasn't the tech, but the lack of context and clear expectations? 💡

As someone who's been there, done that, and got the t-shirt (literally), I've learned a thing or two about streamlining onboarding. Want to learn more? 🤔 Comment with your experience below! #DevOps #OnboardingMistakes"

Let me know if this meets your expectations! 😊

Chosen score: {'style_similarity': 0.821702241897583, 'novelty': 0.1814666986465454, 'structure_ok': True, 'length_ok': True}

=== All Variants (custom) ===

Variant 0  |  length=646 chars
Here's my attempt at crafting a LinkedIn post:

"Confession time, fellow engineering managers: I'm guilty of making dev teams feel 

# Step 16 — compare temperature (0.2 vs 1.0) for your custom persona

In [21]:
# Step 16: temperature comparison (same seed, different temperature)
import httpx, textwrap
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
def gen_with_temp(temp: float):
      r = httpx.post(
          f"{BASE}/generate",
          json={
              "user_id": user_id_custom,
              "brief": {
                  "keywords": ["onboarding", "dev teams"],
                  "goal": "educate",
                  "audience": "engineering managers",
                  "cta": "Comment with your experience",
                  "length_hint": 900,
                  "emoji": True,
              },
              "num_variants": 1,
              "llm_options": {"temperature": temp, "top_p": 0.9, "num_predict": 256, "seed": 123},
          },
          timeout=180.0,
      )
      r.raise_for_status()
      return r.json()
  
g_t02 = gen_with_temp(0.2)
g_t10 = gen_with_temp(1.0)
  
def summarize(tag, g):
      v = g["chosen"]
      s = v["score"]
      preview = textwrap.shorten(v["text"].replace("\n", " / "), width=160)
      print(f"{tag} | len={len(v['text'])} | style={s['style_similarity']:.3f} nov={s['novelty']:.3f} length_ok={s['length_ok']}")
      print("  ", preview)
  
print("✓ Generated with different temperatures (seed=123)")
summarize("T=0.2", g_t02)
summarize("T=1.0", g_t10)


2025-09-06 17:26:08,681 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900
2025-09-06 17:26:12,612 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.791 top_nov=0.210 length_ok=True
2025-09-06 17:26:12,612 INFO [stylgen] generate.done user_id=u_custom1 generation_id=599e31f9-fe89-4c46-a365-20c1cbdf14e3 chosen_sim=0.791 chosen_novelty=0.210
2025-09-06 17:26:12,640 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900


INFO:     127.0.0.1:38544 - "POST /generate HTTP/1.1" 200 OK
INFO:     127.0.0.1:38550 - "POST /generate HTTP/1.1" 200 OK
✓ Generated with different temperatures (seed=123)
T=0.2 | len=912 | style=0.791 nov=0.210 length_ok=True
   Here's a draft post: / / "I've seen it time and again: dev teams stuck in onboarding limbo, with new hires struggling to get up to speed. 🚀 As an [...]
T=1.0 | len=961 | style=0.805 nov=0.196 length_ok=True
   Here's my attempt at crafting a LinkedIn post: / / "I've seen it time and again: onboarding new devs to an existing team is like trying to cram a square [...]


2025-09-06 17:26:14,557 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.805 top_nov=0.196 length_ok=True
2025-09-06 17:26:14,557 INFO [stylgen] generate.done user_id=u_custom1 generation_id=a4cb33e4-7646-4549-9552-231b647f7ec6 chosen_sim=0.805 chosen_novelty=0.196


# Step 17 — compare num_predict (output length and latency)

In [22]:
# Step 17: num_predict comparison (128 vs 256 vs 512)
import httpx, time
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
def gen_with_np(npred: int):
      t0 = time.time()
      r = httpx.post(
          f"{BASE}/generate",
          json={
              "user_id": user_id_custom,
              "brief": {
                  "keywords": ["onboarding", "dev teams"],
                  "goal": "educate",
                  "audience": "engineering managers",
                  "cta": "Comment with your experience",
                  "length_hint": 900,
                  "emoji": True,
              },
              "num_variants": 1,
              "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": npred, "seed": 123},
          },
          timeout=180.0,
      )
      r.raise_for_status()
      g = r.json()
      dt_ms = int((time.time() - t0) * 1000)
      v, s = g["chosen"], g["chosen"]["score"]
      return {"np": npred, "chars": len(v["text"]), "style": s["style_similarity"], "nov":
  s["novelty"], "length_ok": s["length_ok"], "lat_ms": dt_ms}
  
results = [gen_with_np(n) for n in (128, 256, 512)]
for r in results:
      print(f"num_predict={r['np']:>3} | len={r['chars']:>4} | style={r['style']:.3f} | nov={r['nov']:.3f} | length_ok={r['length_ok']} | {r['lat_ms']} ms")


2025-09-06 17:28:47,545 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900
2025-09-06 17:28:48,859 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.814 top_nov=0.187 length_ok=True
2025-09-06 17:28:48,859 INFO [stylgen] generate.done user_id=u_custom1 generation_id=9163d023-ad59-46f6-a838-e6e3d87bc0a3 chosen_sim=0.814 chosen_novelty=0.187
2025-09-06 17:28:48,891 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900


INFO:     127.0.0.1:50078 - "POST /generate HTTP/1.1" 200 OK


2025-09-06 17:28:51,067 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.814 top_nov=0.186 length_ok=True
2025-09-06 17:28:51,067 INFO [stylgen] generate.done user_id=u_custom1 generation_id=e418e5d5-7166-4530-ad6a-1af075ca17a8 chosen_sim=0.814 chosen_novelty=0.186
2025-09-06 17:28:51,083 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900


INFO:     127.0.0.1:50092 - "POST /generate HTTP/1.1" 200 OK
INFO:     127.0.0.1:50098 - "POST /generate HTTP/1.1" 200 OK
num_predict=128 | len= 593 | style=0.814 | nov=0.187 | length_ok=True | 1352 ms
num_predict=256 | len=1067 | style=0.814 | nov=0.186 | length_ok=True | 2207 ms
num_predict=512 | len=1067 | style=0.814 | nov=0.186 | length_ok=True | 2249 ms


2025-09-06 17:28:53,316 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.814 top_nov=0.186 length_ok=True
2025-09-06 17:28:53,317 INFO [stylgen] generate.done user_id=u_custom1 generation_id=552651d6-693a-4070-b773-0742c9a660c0 chosen_sim=0.814 chosen_novelty=0.186


# Step 18 — length_hint=1500 vs num_predict (256 vs 512)

In [23]:
# Step 18: length_hint=1500 interacting with num_predict
import httpx, time
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
def gen_len_hint(npred: int, hint: int = 1500):
      t0 = time.time()
      r = httpx.post(
          f"{BASE}/generate",
          json={
              "user_id": user_id_custom,
              "brief": {
                  "keywords": ["onboarding", "dev teams"],
                  "goal": "educate",
                  "audience": "engineering managers",
                  "cta": "Comment with your experience",
                  "length_hint": hint,
                  "emoji": True,
              },
              "num_variants": 1,
              "llm_options": {"temperature": 0.7, "top_p": 0.9, "num_predict": npred, "seed": 123},
          },
          timeout=240.0,
      )
      r.raise_for_status()
      g = r.json()
      dt_ms = int((time.time() - t0) * 1000)
      v, s = g["chosen"], g["chosen"]["score"]
      return {"np": npred, "chars": len(v["text"]), "style": s["style_similarity"], "nov":
  s["novelty"], "length_ok": s["length_ok"], "lat_ms": dt_ms}
  
results = [gen_len_hint(n) for n in (256, 512)]
for r in results:
      print(f"len_hint=1500 | num_predict={r['np']:>3} | len={r['chars']:>4} | style={r['style']:.3f} | nov={r['nov']:.3f} | length_ok={r['length_ok']} | {r['lat_ms']} ms")
  
# Optional heavier run:
print(gen_len_hint(1024))

2025-09-06 17:31:48,081 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=1500
2025-09-06 17:31:49,988 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.801 top_nov=0.200 length_ok=False
2025-09-06 17:31:49,988 INFO [stylgen] generate.done user_id=u_custom1 generation_id=db8fe59e-ad74-4fb6-99b3-3261ec154f93 chosen_sim=0.801 chosen_novelty=0.200
2025-09-06 17:31:50,010 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=1500


INFO:     127.0.0.1:43194 - "POST /generate HTTP/1.1" 200 OK


2025-09-06 17:31:52,362 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.794 top_nov=0.208 length_ok=True
2025-09-06 17:31:52,362 INFO [stylgen] generate.done user_id=u_custom1 generation_id=fc060a00-03a1-4556-b7a6-5986140eb9aa chosen_sim=0.794 chosen_novelty=0.208
2025-09-06 17:31:52,384 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=1500


INFO:     127.0.0.1:43198 - "POST /generate HTTP/1.1" 200 OK
len_hint=1500 | num_predict=256 | len= 737 | style=0.801 | nov=0.200 | length_ok=False | 1928 ms
len_hint=1500 | num_predict=512 | len=1192 | style=0.794 | nov=0.208 | length_ok=True | 2373 ms
INFO:     127.0.0.1:43212 - "POST /generate HTTP/1.1" 200 OK
{'np': 1024, 'chars': 1192, 'style': 0.7936976552009583, 'nov': 0.20806312561035156, 'length_ok': True, 'lat_ms': 2374}


2025-09-06 17:31:54,737 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.794 top_nov=0.208 length_ok=True
2025-09-06 17:31:54,737 INFO [stylgen] generate.done user_id=u_custom1 generation_id=3838e1a2-08ad-4f85-b9de-26044ce344a9 chosen_sim=0.794 chosen_novelty=0.208


# Step 19 — compare top_p (0.80 vs 0.95) at fixed temperature

In [24]:
# Step 19: top_p comparison (hold temp/seed constant)
import httpx, textwrap, time
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
def gen_with_topp(tp: float, hint: int = 900, temp: float = 0.7, npred: int = 256):
      t0 = time.time()
      r = httpx.post(
          f"{BASE}/generate",
          json={
              "user_id": user_id_custom,
              "brief": {
                  "keywords": ["onboarding", "dev teams"],
                  "goal": "educate",
                  "audience": "engineering managers",
                  "cta": "Comment with your experience",
                  "length_hint": hint,
                  "emoji": True,
              },
              "num_variants": 1,
              "llm_options": {"temperature": temp, "top_p": tp, "num_predict": npred, "seed": 123},
          },
          timeout=180.0,
      )
      r.raise_for_status()
      out = r.json()
      dt_ms = int((time.time() - t0) * 1000)
      v = out["chosen"]
      s = v["score"]
      preview = textwrap.shorten(v["text"].replace("\n", " / "), width=160)
      return {
          "tp": tp, "chars": len(v["text"]), "style": s["style_similarity"], "nov": s["novelty"],
          "length_ok": s["length_ok"], "lat_ms": dt_ms, "preview": preview
      }
  
res_080 = gen_with_topp(0.80)
res_095 = gen_with_topp(0.95)
  
print("✓ Generated with top_p variants (temp=0.7, seed=123, len_hint=900, num_predict=256)")
for r in (res_080, res_095):
      print(f"top_p={r['tp']:.2f} | len={r['chars']} | style={r['style']:.3f} | nov={r['nov']:.3f} | length_ok={r['length_ok']} | {r['lat_ms']} ms")
      print("  ", r["preview"])


2025-09-06 17:33:56,974 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900
2025-09-06 17:33:59,412 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.813 top_nov=0.187 length_ok=True
2025-09-06 17:33:59,413 INFO [stylgen] generate.done user_id=u_custom1 generation_id=4bf02041-c22b-47d2-8e4b-cc8a8feef8f2 chosen_sim=0.813 chosen_novelty=0.187
2025-09-06 17:33:59,449 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900


INFO:     127.0.0.1:42982 - "POST /generate HTTP/1.1" 200 OK
INFO:     127.0.0.1:42990 - "POST /generate HTTP/1.1" 200 OK
✓ Generated with top_p variants (temp=0.7, seed=123, len_hint=900, num_predict=256)
top_p=0.80 | len=1140 | style=0.813 | nov=0.187 | length_ok=True | 2470 ms
   Here's my attempt at crafting a LinkedIn post that fits your requirements: / / "Confession time: I've seen more onboarding failures than successful ones [...]
top_p=0.95 | len=712 | style=0.800 | nov=0.200 | length_ok=True | 1699 ms
   Here's a draft LinkedIn post: / / "As an engineering manager, I've seen my fair share of onboarding disasters 🚨. Remember that one dev team that took 6 [...]


2025-09-06 17:34:01,124 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.800 top_nov=0.200 length_ok=True
2025-09-06 17:34:01,124 INFO [stylgen] generate.done user_id=u_custom1 generation_id=b45dbb78-70c0-47fc-a795-72ba5027a942 chosen_sim=0.800 chosen_novelty=0.200


# Step 20 — show full texts for top_p 0.80 vs 0.95

In [25]:
# Step 20: full chosen texts for top_p=0.80 and 0.95
import httpx
  
assert "user_id_custom" in globals(), "Please run Step 13 first."
  
def gen_topp_full(tp: float):
      r = httpx.post(
          f"{BASE}/generate",
          json={
              "user_id": user_id_custom,
              "brief": {
                  "keywords": ["onboarding", "dev teams"],
                  "goal": "educate",
                  "audience": "engineering managers",
                  "cta": "Comment with your experience",
                  "length_hint": 900,
                  "emoji": True,
              },
              "num_variants": 1,
              "llm_options": {"temperature": 0.7, "top_p": tp, "num_predict": 256, "seed": 123},
          },
          timeout=180.0,
      )
      r.raise_for_status()
      return r.json()
  
g_tp080 = gen_topp_full(0.80)
g_tp095 = gen_topp_full(0.95)
  
print("=== top_p = 0.80 ===")
print(g_tp080["chosen"]["text"])
print("\nScore:", g_tp080["chosen"]["score"])
  
print("\n" + "="*80)
print("=== top_p = 0.95 ===")
print(g_tp095["chosen"]["text"])
print("\nScore:", g_tp095["chosen"]["score"])


2025-09-06 17:36:56,671 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900
2025-09-06 17:36:58,895 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.813 top_nov=0.192 length_ok=True
2025-09-06 17:36:58,895 INFO [stylgen] generate.done user_id=u_custom1 generation_id=1fa567af-8373-4068-9767-de0cf270cc61 chosen_sim=0.813 chosen_novelty=0.192
2025-09-06 17:36:58,924 INFO [stylgen] generate.request user_id=u_custom1 goal=educate keywords=onboarding,dev teams variants=1 len_hint=900


INFO:     127.0.0.1:55422 - "POST /generate HTTP/1.1" 200 OK
INFO:     127.0.0.1:55428 - "POST /generate HTTP/1.1" 200 OK
=== top_p = 0.80 ===
Here's my attempt at crafting a LinkedIn post that fits your requirements:

"Confession time: I've seen more onboarding disasters than I can count 🚨. As an engineering manager, you know how crucial it is to get dev teams up and running quickly. But let's face it, most onboarding processes are a mess of unnecessary meetings, unclear expectations, and too much repetition.

My team and I have developed a 3-step approach that's been a for us: prioritize clear communication, automate repetitive tasks, and schedule regular check-ins. For example, we've reduced our average onboarding time by 30% simply by implementing a standardized template for new hires to follow 📈.

So, what's your experience with onboarding? Have you found a process that works for you? Comment below and let's swap stories! #EngineeringManagement #DevTeamOnboarding #ManagerLife"

Th

2025-09-06 17:37:00,573 INFO [stylgen.pipeline] variants.sorted count=1 top_sim=0.800 top_nov=0.200 length_ok=True
2025-09-06 17:37:00,573 INFO [stylgen] generate.done user_id=u_custom1 generation_id=b59308fe-20b6-47f6-878e-4eb5a6d5093f chosen_sim=0.800 chosen_novelty=0.200


# other parameters to tweak

- LLM options: temperature, top_p, top_k, num_predict, repeat_penalty, seed, stop, num_ctx,
  mirostat, mirostat_tau, mirostat_eta.
- Brief: keywords, goal, audience, cta, length_hint, emoji, link.
- Variants: num_variants (returns multiple and reranks).
- Persona prefs: tone_descriptors, taboo_phrases, formality, emoji_ok, hashtags_niche,
  structure_pref.
- Embedder/model: STYLGEN_EMBEDDER, STYLGEN_ST_MODEL, OLLAMA_MODEL; logging via STYLGEN_DEBUG,
  STYLGEN_LOG_LEVEL.
  


Notebook Walkthrough
  
  - Step 1: Start Server
      - Purpose: Launches uvicorn stylgen_v0.main:app in the background with HashingEmbedder.
      - Vars: HOST, PORT, BASE build the API base URL. SERVER_PROC tracks the spawned server so you
  can terminate/restart it. env sets STYLGEN_LOG_LEVEL, STYLGEN_DEBUG, STYLGEN_EMBEDDER=hash.
      - Behavior: If a previous SERVER_PROC exists, it’s terminated to avoid port conflicts. A new
  process is started and PID printed.
      - Behavior: If a previous SERVER_PROC exists, it’s terminated to avoid port conflicts. A new
  process is started and PID printed.
  - Step 1 (env tweak cell)
      - Purpose: Sets env["OLLAMA_BASE"] and env["OLLAMA_MODEL"] in the notebook process.
      - Note: This does not affect the already running server (env is read at process start). Use
  these only before launching/restarting the server.
  - Step 2: Health Check
      - Purpose: Polls GET {BASE}/health up to ~10s until it returns {"status": "ok"}.
      - Vars: BASE reused; httpx used synchronously for a simple check.
      - Outcome: Confirms server is ready.
  - Step 3: Create Persona (example)
      - Purpose: Sends POST /persona with a sample user_id and three writing samples + preferences.
      - Vars: user_id, persona_payload. Response saved as resp; card = resp["persona"].
      - Behavior: Server embeds samples, stores vectors, and builds a PersonaCard (centroid +
  exemplar_ids). Prints exemplar_ids, centroid_dim (384 with hashing), and tone.
  - Step 4: Generate (non‑stream)
      - Purpose: Sends POST /generate with a brief, asks for 2 variants, forwards llm_options.
      - Vars: gen_payload, response as g; extracts generation_id, g["variants"], and g["chosen"].
      - Output: Prints chosen preview and per‑variant scores (style similarity, novelty, length_ok).
  - Step 5: Show Full Variant Texts
      - Purpose: Displays the entire text for the chosen variant and all variants with their scores.
      - Vars: Uses g from Step 4.
  - Step 6: Stream a Live Draft (SSE)
      - Purpose: Streams raw tokens from POST /generate/stream and prints as they arrive.
      - Vars: Async function stream_post(); builds req (same brief as Step 4). Tracks printed to
  truncate display after ~1200 chars.
      - Behavior: Prints an initial [META] event (exemplar previews and brief meta), then token
  chunks (data: lines) until [done]. Streaming output is un‑critiqued (raw LLM text).
  - Step 7: Submit Feedback
      - Purpose: POST /feedback for the last generation.
      - Vars: fb_payload uses user_id and g["generation_id"]. Requires g to exist.
      - Behavior: Server verifies generation exists and ownership matches. Returns
  {"status":"received"}.
  - Step 8a: Install Sentence‑Transformers (optional extra)
      - Purpose: Runs uv sync --extra hf-embeddings from repo root to install the optional embeddings
  stack.
      - Behavior: Downloads heavy packages (Torch, Transformers, etc.) into your uv environment,
  enabling ST embeddings.
  - Step 8b: Restart with ST Embedder
      - Purpose: Terminates the previous server and relaunches with STYLGEN_EMBEDDER=st and
  STYLGEN_ST_MODEL=intfloat/e5-large-v2.
      - Vars: Reuses SERVER_PROC, HOST/PORT/BASE, sets new env.
      - Note: All in‑memory state is reset on restart; personas and generations must be recreated.
  - Step 8c: Health Check (ST)
      - Purpose: Same as Step 2, confirms the ST‑backed server is up.
  - Step 9: Create Persona (ST)
      - Purpose: Creates a new persona under user_id = "u_nb1_st".
      - Output: centroid_dim is 1024 (from the ST model’s embedding size). Exemplars and centroid
  computed with ST.
  - Step 10: Generate (ST)
      - Purpose: Generates with the ST persona; here num_variants=3 (note the increase vs earlier).
      - Vars: Response saved to g_st.
      - Output: Per‑variant scores; top may have length_ok=False depending on draft length. Sorting
  favors style similarity + novelty, then length bonus.
  - Step 11: Show Full Variant Texts (ST)
      - Purpose: Displays g_st["chosen"] and all g_st["variants"] with scores.
      - Observation: Higher style similarity values vs hashing are normal with a higher‑quality
  embedder.
  - Step 12: Stream (ST)
      - Purpose: Same as Step 6, but with the ST persona (user_id = "u_nb1_st").
      - Behavior: Prints [META] then streamed tokens until [done]. Uses last_event to distinguish
  meta vs token chunks.
  - Step 13: Create Your Own Persona
      - Purpose: Lets you define user_id_custom, your custom_samples, and preferences_custom.
      - Vars: Sends POST /persona with your data; prints exemplar_ids and centroid_dim (1024 under
  ST).
  - Step 14: Generate (Custom Persona)
      - Purpose: Generates 2 variants for user_id_custom with the same brief defaults.
      - Vars: Saves to g_custom. Prints per‑variant scores and a chosen preview.
  - Step 15: Show Full Variant Texts (Custom Persona)
      - Purpose: Displays chosen text and all variants for your persona, with scores.
  - Step 16: Temperature Comparison (0.2 vs 1.0)
      - Purpose: Compares outputs and scores at different temperature values with a fixed seed=123.
      - Vars: gen_with_temp(temp) helper. Prints length, style/novelty, and a preview for each run.
      - Note: seed helps reproducibility; Ollama forwards it at the provider layer.
  - Step 17: num_predict Comparison
      - Purpose: Measures the effect of num_predict (max new tokens) on output length and latency.
      - Vars: gen_with_np(npred) times each request; prints length and lat_ms.
      - Observation: num_predict is a ceiling, not a target; outputs can saturate before the cap.
  - Step 18: length_hint vs num_predict
      - Purpose: Increases length_hint to 1500 and compares num_predict=256 vs 512 (and optional
  1024).
      - Outcome: Shows length_ok flips to True with a higher num_predict, allowing longer drafts
  closer to the hint.
  - Step 19: top_p Comparison (0.80 vs 0.95)
      - Purpose: Tests nucleus sampling breadth at fixed temperature=0.7 and seed=123.
      - Vars: gen_with_topp(tp) prints length, scores, and a short preview.
      - Observation: Lower top_p (0.80) tends to be more focused/longer; higher (0.95) more
  permissive/varied and often shorter within the same num_predict.
  - Step 20: Full Texts for top_p Runs
      - Purpose: Prints the complete chosen text for top_p=0.80 and 0.95 so you can read and compare.
      - Vars: g_tp080, g_tp095 hold each response. Scores printed for each.
  - Other Parameters to Tweak (markdown)
      - Summary: Lists knobs you can adjust:
      - LLM: `temperature`, `top_p`, `top_k`, `num_predict`, `repeat_penalty`, `seed`, `stop`,
  `num_ctx`, `mirostat*`.
      - Brief: `keywords`, `goal`, `audience`, `cta`, `length_hint`, `emoji`, `link`.
      - Variants: `num_variants` (reranking logic chooses `chosen`).
      - Persona: tone, taboo phrases, formality, emoji/hashtags, structure.
      - Model: `STYLGEN_EMBEDDER`, `STYLGEN_ST_MODEL`, `OLLAMA_MODEL`, logging via `STYLGEN_DEBUG`,
  `STYLGEN_LOG_LEVEL`.
  
