In [None]:
%pip install weave requests --quiet
# If you want to use the optional OpenAI example, uncomment the next line:
# %pip install openai --quiet


In [None]:
import os
import weave

PROJECT_NAME = "chakoshi-guardrails-cookbook"
weave.init(PROJECT_NAME)

CHAKOSHI_API_KEY = os.getenv("CHAKOSHI_API_KEY")
CHAKOSHI_CATEGORY_SET_ID = os.getenv("CHAKOSHI_CATEGORY_SET_ID")

if not CHAKOSHI_API_KEY:
    print("[WARN] CHAKOSHI_API_KEY is not set. Please set it before running.")


In [None]:
import requests
from typing import Any, Dict, Optional

class ChakoshiModerationScorer(weave.Scorer):
    """
    A Weave Scorer that calls Chakoshi `/v1/judge/text` to evaluate output safety.

    Args:
        model (str): Chakoshi model ID to use for moderation.
        category_set_id (str | None): Category Set ID; if omitted, the API default is used.
        timeout_seconds (float): HTTP timeout seconds.

    Returns:
        dict: Score result fields:
            - flagged (bool): Whether the content is unsafe.
            - reason (str | None): Short reason when unsafe.
            - unsafe_score (float | None): Unsafe score (if provided).
            - unsafe_category (str | None): Category detected.
            - label_str (str | None): Label string.

    Examples:
        >>> scorer = ChakoshiModerationScorer()
        >>> result = scorer.score("This is a safe text")
        >>> isinstance(result["flagged"], bool)
        True
    """

    model: str = "chakoshi-moderation-241223"
    category_set_id: Optional[str] = None
    timeout_seconds: float = 10.0

    @weave.op
    def score(self, output: str) -> Dict[str, Any]:
        """
        Evaluate the given output text using the Chakoshi API.

        Args:
            output (str): The text to evaluate.

        Returns:
            dict: The score result (flagged, reason, etc.).
        """
        if not CHAKOSHI_API_KEY:
            return {"flagged": False, "reason": "CHAKOSHI_API_KEY is not set", "unsafe_score": None, "unsafe_category": None, "label_str": None}

        url = "https://api.beta.chakoshi.ntt.com/v1/judge/text"
        headers = {
            "Authorization": f"Bearer {CHAKOSHI_API_KEY}",
            "Content-Type": "application/json",
            "accept": "application/json",
        }
        payload: Dict[str, Any] = {
            "input": str(output),
            "model": self.model,
        }
        if self.category_set_id or CHAKOSHI_CATEGORY_SET_ID:
            payload["category_set_id"] = self.category_set_id or CHAKOSHI_CATEGORY_SET_ID

        try:
            resp = requests.post(url, headers=headers, json=payload, timeout=self.timeout_seconds)
        except Exception as e:
            return {"flagged": False, "reason": f"Chakoshi API error: {e}", "unsafe_score": None, "unsafe_category": None, "label_str": None}

        if resp.status_code != 200:
            return {"flagged": False, "reason": f"Chakoshi API non-200: {resp.status_code}", "unsafe_score": None, "unsafe_category": None, "label_str": None}

        data: Dict[str, Any] = resp.json() if resp.content else {}
        results: Dict[str, Any] = data.get("results", {}) if isinstance(data, dict) else {}
        unsafe_flag = bool(results.get("unsafe_flag", False))
        label_str = results.get("label_str")
        unsafe_score = results.get("unsafe_score")
        unsafe_category = results.get("unsafe_category")

        return {
            "flagged": unsafe_flag,
            "reason": (f"Chakoshi flagged: {unsafe_category}, score: {unsafe_score} ({label_str})" if unsafe_flag else None),
            "unsafe_score": unsafe_score,
            "unsafe_category": unsafe_category,
            "label_str": label_str,
        }


In [None]:
@weave.op
def generate_response(prompt: str) -> str:
    """
    Simple demo responder (LLM stand-in) used for illustrating guardrails.

    Args:
        prompt (str): User input prompt.

    Returns:
        str: A demo response string.

    Examples:
        >>> generate_response("hello")[0]  # doctest: +ELLIPSIS
        'hello response...'
    """
    return f"{prompt} response..."


In [None]:
async def process_with_guardrail(prompt: str) -> str:
    """
    Apply Chakoshi guardrails using Weave's `call.apply_scorer` and decide whether
    to block or allow the response.

    Args:
        prompt (str): User input prompt.

    Returns:
        str: The final response after guardrails.

    Examples:
        >>> import asyncio
        >>> asyncio.run(process_with_guardrail("safe topic"))  # doctest: +ELLIPSIS
        'safe topic response...'
    """
    response, call = generate_response.call(prompt)

    # Apply the Scorer
    evaluation = await call.apply_scorer(
        ChakoshiModerationScorer(
            name="chakoshi-guardrail",
            category_set_id=CHAKOSHI_CATEGORY_SET_ID,
        )
    )

    if evaluation.result.get("flagged"):
        return f"[BLOCKED] {evaluation.result.get('reason') or 'Unsafe content detected'}"
    return response


In [None]:
print("=== Minimal example ===")
res = await process_with_guardrail("Tell me about the future of AI")
print(res)


In [None]:
import os
# from openai import AsyncOpenAI  # 使う場合のみ

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

@weave.op()
async def generate_text_with_openai(prompt: str) -> str:
    """
    Call OpenAI asynchronously to generate a response (optional example).
    In production, tune the model and parameters accordingly.
    """
    if not OPENAI_API_KEY:
        # Fallback for demo if no API key
        return f"[no-openai] {prompt} response..."

    # Example if you want to actually call OpenAI (commented out):
    # client = AsyncOpenAI(api_key=OPENAI_API_KEY)
    # resp = await client.chat.completions.create(
    #     model="gpt-4o",
    #     messages=[
    #         {"role": "system", "content": "Respond politely in English or Japanese."},
    #         {"role": "user", "content": prompt},
    #     ],
    # )
    # return resp.choices[0].message.content

    return f"[simulated-openai] {prompt} response..."


In [None]:
async def guarded_generation_with_openai(prompt: str) -> str:
    """
    Apply Chakoshi guardrails to OpenAI output.
    The UI Category Set ID (CHAKOSHI_CATEGORY_SET_ID) is passed for evaluation.
    """
    result, call = await generate_text_with_openai.call(prompt)

    chk = await call.apply_scorer(
        ChakoshiModerationScorer(
            name="chakoshi-guardrail",
            category_set_id=CHAKOSHI_CATEGORY_SET_ID,
        )
    )
    if chk.result.get("flagged"):
        return f"[BLOCKED] {chk.result.get('reason') or 'Unsafe content detected'}"
    return result


In [None]:
print("=== OpenAI example (optional) ===")
resp = await guarded_generation_with_openai("How much energy does it take to go back to the future with a DeLorean?")
print(resp)


In [None]:
print("=== Batch test ===")

test_prompts = [
    "Recommend travel spots",
    "How much energy to time travel with a DeLorean?",
    "Tell me about AI ethics",
]

for p in test_prompts:
    out = await process_with_guardrail(p)
    print(f"Input: {p}\nOutput: {out}\n")
