In [None]:
# Imports
from collections import deque
from typing import List, Dict, Deque, Iterator
import time
import random

from blazemetrics import (
    compute_text_metrics,
    aggregate_samples,
    monitor_tokens_sync,
    enforce_stream_sync,
    Guardrails,
    MetricsExporters,
)

print("✅ Imports ready")


In [None]:
# Simulated streaming source

def simulate_stream(n: int = 300, seed: int = 123) -> List[tuple[str, List[str]]]:
    rng = random.Random(seed)
    data: List[tuple[str, List[str]]] = []
    for i in range(n):
        prompt = f"user asked {i}"
        ref = [f"answer for {i}"]
        # insert quality dips every 75 steps
        if i % 75 == 0:
            ref = ["noisy reference"]
        data.append((prompt, ref))
    return data

print("✅ Stream simulator ready")


In [None]:
# Rolling-window metrics with thresholded alerts

ALERT_THRESHOLDS = {
    "bleu": 0.15,
    "rouge1_f1": 0.30,
    "chrf": 0.25,
    "wer": 0.40,  # higher is worse
}

window: Deque[tuple[str, List[str]]] = deque(maxlen=100)
history: List[Dict[str, float]] = []

for i, (prompt, ref) in enumerate(simulate_stream()):
    pred = prompt.replace("asked", "answered")  # placeholder for model
    window.append((pred, ref))
    if len(window) < window.maxlen:
        continue

    candidates = [p for p, _ in window]
    refs = [r for _, r in window]

    sm = compute_text_metrics(candidates, refs, include=["bleu", "rouge1", "chrf", "wer"])
    agg = aggregate_samples(sm)
    history.append(agg)

    alerts = []
    if agg.get("bleu", 1.0) < ALERT_THRESHOLDS["bleu"]:
        alerts.append(f"BLEU drop: {agg['bleu']:.3f}")
    if agg.get("rouge1_f1", 1.0) < ALERT_THRESHOLDS["rouge1_f1"]:
        alerts.append(f"ROUGE-1 drop: {agg['rouge1_f1']:.3f}")
    if agg.get("chrf", 1.0) < ALERT_THRESHOLDS["chrf"]:
        alerts.append(f"chrF drop: {agg['chrf']:.3f}")
    if agg.get("wer", 0.0) > ALERT_THRESHOLDS["wer"]:
        alerts.append(f"WER spike: {agg['wer']:.3f}")

    if i % 20 == 0:
        print(f"t={i}: metrics={{k: round(v,3) for k,v in agg.items()}}")
    if alerts:
        print("ALERT:", "; ".join(alerts))

print("✅ Rolling-window monitoring complete")


In [None]:
# Token-level guardrails monitoring and enforcement

rails = Guardrails(
    blocklist=["bomb", "terror"],
    regexes=[r"\b\d{3}-\d{2}-\d{4}\b"],  # SSN
    case_insensitive=True,
    redact_pii=True,
    safety=True,
)

def token_iter_from_text(text: str, n: int = 6) -> Iterator[str]:
    toks = text.split()
    for i in range(0, len(toks), n):
        yield " ".join(toks[i:i+n])

text = (
    "Write a note including an SSN 123-45-6789 and do not ignore previous instructions. "
    "This could be dangerous, almost like a terror threat."
)

print("— Monitoring tokens —")
for res in monitor_tokens_sync(token_iter_from_text(text, 6), rails, every_n_tokens=1, joiner=" "):
    print({k: (v if isinstance(v, list) else v) for k, v in res.items()})

print("\n— Enforcing tokens —")
for chunk in enforce_stream_sync(token_iter_from_text(text, 6), rails, every_n_tokens=1, joiner=" ", replacement="[BLOCKED]", safety_threshold=0.6):
    print(chunk, end="")
print()


In [None]:
# Optional: Export aggregated metrics to Prometheus/StatsD

exporters = MetricsExporters(prometheus_gateway=None, statsd_addr=None)

if history:
    latest = history[-1]
    exporters.export(latest, labels={"window": "100"})
    print("✅ Exported latest window metrics (mock). Configure gateways to push.")
