diff --git a/temporalio/contrib/google_adk_agents/_model.py b/temporalio/contrib/google_adk_agents/_model.py
index 8b32a7432..e753558f8 100644
--- a/temporalio/contrib/google_adk_agents/_model.py
+++ b/temporalio/contrib/google_adk_agents/_model.py
@@ -1,5 +1,7 @@
+import json
+import logging
 from collections.abc import AsyncGenerator, Callable
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
 
 from google.adk.models import BaseLlm, LLMRegistry
 from google.adk.models.llm_request import LlmRequest
@@ -7,8 +9,23 @@
 
 import temporalio.workflow
 from temporalio import activity, workflow
+from temporalio.contrib.pubsub import PubSubClient
 from temporalio.workflow import ActivityConfig
 
+logger = logging.getLogger(__name__)
+
+EVENTS_TOPIC = "events"
+
+
+def _make_event(event_type: str, **data: object) -> bytes:
+    return json.dumps(
+        {
+            "type": event_type,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "data": data,
+        }
+    ).encode()
+
 
 @activity.defn
 async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
@@ -36,6 +53,70 @@ async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
     ]
 
 
+@activity.defn
+async def invoke_model_streaming(llm_request: LlmRequest) -> list[LlmResponse]:
+    """Streaming-aware model activity.
+
+    Calls the LLM with stream=True, publishes TEXT_DELTA events via
+    PubSubClient as tokens arrive, and returns the collected responses.
+
+    The PubSubClient auto-detects the activity context to find the parent
+    workflow for publishing.
+
+    Args:
+        llm_request: The LLM request containing model name and parameters.
+
+    Returns:
+        List of LLM responses from the model.
+    """
+    if llm_request.model is None:
+        raise ValueError("No model name provided, could not create LLM.")
+
+    llm = LLMRegistry.new_llm(llm_request.model)
+    if not llm:
+        raise ValueError(f"Failed to create LLM for model: {llm_request.model}")
+
+    pubsub = PubSubClient.from_activity(batch_interval=0.1)
+    responses: list[LlmResponse] = []
+    text_buffer = ""
+
+    async with pubsub:
+        pubsub.publish(EVENTS_TOPIC, _make_event("LLM_CALL_START"), force_flush=True)
+
+        async for response in llm.generate_content_async(
+            llm_request=llm_request, stream=True
+        ):
+            activity.heartbeat()
+            responses.append(response)
+
+            if response.content and response.content.parts:
+                for part in response.content.parts:
+                    if part.text:
+                        text_buffer += part.text
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event("TEXT_DELTA", delta=part.text),
+                        )
+                    if part.function_call:
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event(
+                                "TOOL_CALL_START",
+                                tool_name=part.function_call.name,
+                            ),
+                        )
+
+        if text_buffer:
+            pubsub.publish(
+                EVENTS_TOPIC,
+                _make_event("TEXT_COMPLETE", text=text_buffer),
+                force_flush=True,
+            )
+        pubsub.publish(EVENTS_TOPIC, _make_event("LLM_CALL_COMPLETE"), force_flush=True)
+
+    return responses
+
+
 class TemporalModel(BaseLlm):
     """A Temporal-based LLM model that executes model invocations as activities."""
 
@@ -43,6 +124,7 @@ def __init__(
         self,
         model_name: str,
         activity_config: ActivityConfig | None = None,
+        streaming: bool = False,
         *,
         summary_fn: Callable[[LlmRequest], str | None] | None = None,
     ) -> None:
@@ -51,6 +133,9 @@ def __init__(
         Args:
             model_name: The name of the model to use.
             activity_config: Configuration options for the activity execution.
+            streaming: When True, the model activity uses the streaming LLM
+                endpoint and publishes token events via PubSubClient. The
+                workflow is unaffected -- it still receives complete responses.
             summary_fn: Optional callable that receives the LlmRequest and
                 returns a summary string (or None) for the activity. Must be
                 deterministic as it is called during workflow execution. If
@@ -62,6 +147,7 @@ def __init__(
         """
         super().__init__(model=model_name)
         self._model_name = model_name
+        self._streaming = streaming
         self._summary_fn = summary_fn
         self._activity_config = ActivityConfig(
             start_to_close_timeout=timedelta(seconds=60)
@@ -80,7 +166,8 @@ async def generate_content_async(
 
         Args:
             llm_request: The LLM request containing model parameters and content.
-            stream: Whether to stream the response (currently ignored).
+            stream: Whether to stream the response (currently ignored; use the
+                ``streaming`` constructor parameter instead).
 
         Yields:
             The responses from the model.
@@ -103,8 +190,9 @@ async def generate_content_async(
                 agent_name = llm_request.config.labels.get("adk_agent_name")
                 if agent_name:
                     config["summary"] = agent_name
+        activity_fn = invoke_model_streaming if self._streaming else invoke_model
         responses = await workflow.execute_activity(
-            invoke_model,
+            activity_fn,
             args=[llm_request],
             **config,
         )
diff --git a/temporalio/contrib/google_adk_agents/_plugin.py b/temporalio/contrib/google_adk_agents/_plugin.py
index 9be321398..7bd1862bb 100644
--- a/temporalio/contrib/google_adk_agents/_plugin.py
+++ b/temporalio/contrib/google_adk_agents/_plugin.py
@@ -8,7 +8,10 @@
 
 from temporalio import workflow
 from temporalio.contrib.google_adk_agents._mcp import TemporalMcpToolSetProvider
-from temporalio.contrib.google_adk_agents._model import invoke_model
+from temporalio.contrib.google_adk_agents._model import (
+    invoke_model,
+    invoke_model_streaming,
+)
 from temporalio.contrib.pydantic import (
     PydanticPayloadConverter,
     ToJsonOptions,
@@ -95,7 +98,7 @@ def workflow_runner(runner: WorkflowRunner | None) -> WorkflowRunner:
                 )
             return runner
 
-        new_activities = [invoke_model]
+        new_activities = [invoke_model, invoke_model_streaming]
         if toolset_providers is not None:
             for toolset_provider in toolset_providers:
                 new_activities.extend(toolset_provider._get_activities())
diff --git a/temporalio/contrib/openai_agents/_invoke_model_activity.py b/temporalio/contrib/openai_agents/_invoke_model_activity.py
index cffd8855e..62a2eb2a7 100644
--- a/temporalio/contrib/openai_agents/_invoke_model_activity.py
+++ b/temporalio/contrib/openai_agents/_invoke_model_activity.py
@@ -4,9 +4,10 @@
 """
 
 import enum
+import logging
 from dataclasses import dataclass
 from datetime import timedelta
-from typing import Any
+from typing import Any, NoReturn
 
 from agents import (
     AgentOutputSchemaBase,
@@ -24,6 +25,7 @@
     RunContextWrapper,
     Tool,
     TResponseInputItem,
+    Usage,
     UserError,
     WebSearchTool,
 )
@@ -38,13 +40,19 @@
     APIStatusError,
     AsyncOpenAI,
 )
+from openai.types.responses import ResponseCompletedEvent
 from openai.types.responses.tool_param import Mcp
 from typing_extensions import Required, TypedDict
 
 from temporalio import activity
 from temporalio.contrib.openai_agents._heartbeat_decorator import _auto_heartbeater
+from temporalio.contrib.pubsub import PubSubClient
 from temporalio.exceptions import ApplicationError
 
+logger = logging.getLogger(__name__)
+
+EVENTS_TOPIC = "events"
+
 
 @dataclass
 class HandoffInput:
@@ -187,6 +195,108 @@ class ActivityModelInput(TypedDict, total=False):
     prompt: Any | None
 
 
+async def _empty_on_invoke_tool(_ctx: RunContextWrapper[Any], _input: str) -> str:
+    return ""
+
+
+async def _empty_on_invoke_handoff(_ctx: RunContextWrapper[Any], _input: str) -> Any:
+    return None
+
+
+async def _noop_shell_executor(*_a: Any, **_kw: Any) -> str:
+    return ""
+
+
+def _build_tool(tool: ToolInput) -> Tool:
+    """Reconstruct a Tool from its data-conversion-friendly input form."""
+    if isinstance(
+        tool,
+        (
+            FileSearchTool,
+            WebSearchTool,
+            ImageGenerationTool,
+            CodeInterpreterTool,
+            LocalShellTool,
+            ToolSearchTool,
+        ),
+    ):
+        return tool
+    elif isinstance(tool, ShellToolInput):
+        return ShellTool(
+            name=tool.name,
+            environment=tool.environment,
+            executor=_noop_shell_executor,
+        )
+    elif isinstance(tool, ApplyPatchToolInput):
+        return ApplyPatchTool(name=tool.name, editor=_NoopApplyPatchEditor())
+    elif isinstance(tool, HostedMCPToolInput):
+        return HostedMCPTool(tool_config=tool.tool_config)
+    elif isinstance(tool, FunctionToolInput):
+        return FunctionTool(
+            name=tool.name,
+            description=tool.description,
+            params_json_schema=tool.params_json_schema,
+            on_invoke_tool=_empty_on_invoke_tool,
+            strict_json_schema=tool.strict_json_schema,
+        )
+    else:
+        raise UserError(f"Unknown tool type: {tool.name}")  # type:ignore[reportUnreachable]
+
+
+def _build_tools_and_handoffs(
+    input: ActivityModelInput,
+) -> tuple[list[Tool], list[Handoff[Any, Any]]]:
+    tools = [_build_tool(x) for x in input.get("tools", [])]
+    handoffs: list[Handoff[Any, Any]] = [
+        Handoff(
+            tool_name=x.tool_name,
+            tool_description=x.tool_description,
+            input_json_schema=x.input_json_schema,
+            agent_name=x.agent_name,
+            strict_json_schema=x.strict_json_schema,
+            on_invoke_handoff=_empty_on_invoke_handoff,
+        )
+        for x in input.get("handoffs", [])
+    ]
+    return tools, handoffs
+
+
+def _raise_for_openai_status(e: APIStatusError) -> NoReturn:
+    """Translate an OpenAI APIStatusError into the right retry posture."""
+    retry_after: timedelta | None = None
+    retry_after_ms_header = e.response.headers.get("retry-after-ms")
+    if retry_after_ms_header is not None:
+        retry_after = timedelta(milliseconds=float(retry_after_ms_header))
+
+    if retry_after is None:
+        retry_after_header = e.response.headers.get("retry-after")
+        if retry_after_header is not None:
+            retry_after = timedelta(seconds=float(retry_after_header))
+
+    should_retry_header = e.response.headers.get("x-should-retry")
+    if should_retry_header == "true":
+        raise e
+    if should_retry_header == "false":
+        raise ApplicationError(
+            "Non retryable OpenAI error",
+            non_retryable=True,
+            next_retry_delay=retry_after,
+        ) from e
+
+    if e.response.status_code in [408, 409, 429] or e.response.status_code >= 500:
+        raise ApplicationError(
+            f"Retryable OpenAI status code: {e.response.status_code}",
+            non_retryable=False,
+            next_retry_delay=retry_after,
+        ) from e
+
+    raise ApplicationError(
+        f"Non retryable OpenAI status code: {e.response.status_code}",
+        non_retryable=True,
+        next_retry_delay=retry_after,
+    ) from e
+
+
 class ModelActivity:
     """Class wrapper for model invocation activities to allow model customization. By default, we use an OpenAIProvider with retries disabled.
     Disabling retries in your model of choice is recommended to allow activity retries to define the retry model.
@@ -203,72 +313,7 @@ def __init__(self, model_provider: ModelProvider | None = None):
     async def invoke_model_activity(self, input: ActivityModelInput) -> ModelResponse:
         """Activity that invokes a model with the given input."""
         model = self._model_provider.get_model(input.get("model_name"))
-
-        async def empty_on_invoke_tool(
-            _ctx: RunContextWrapper[Any], _input: str
-        ) -> str:
-            return ""
-
-        async def empty_on_invoke_handoff(
-            _ctx: RunContextWrapper[Any], _input: str
-        ) -> Any:
-            return None
-
-        def make_tool(tool: ToolInput) -> Tool:
-            if isinstance(
-                tool,
-                (
-                    FileSearchTool,
-                    WebSearchTool,
-                    ImageGenerationTool,
-                    CodeInterpreterTool,
-                    LocalShellTool,
-                    ToolSearchTool,
-                ),
-            ):
-                return tool
-            elif isinstance(tool, ShellToolInput):
-
-                async def _noop_executor(*a: Any, **kw: Any) -> str:  # type: ignore[reportUnusedParameter]
-                    return ""
-
-                return ShellTool(
-                    name=tool.name,
-                    environment=tool.environment,
-                    executor=_noop_executor,
-                )
-            elif isinstance(tool, ApplyPatchToolInput):
-                return ApplyPatchTool(
-                    name=tool.name,
-                    editor=_NoopApplyPatchEditor(),
-                )
-            elif isinstance(tool, HostedMCPToolInput):
-                return HostedMCPTool(
-                    tool_config=tool.tool_config,
-                )
-            elif isinstance(tool, FunctionToolInput):
-                return FunctionTool(
-                    name=tool.name,
-                    description=tool.description,
-                    params_json_schema=tool.params_json_schema,
-                    on_invoke_tool=empty_on_invoke_tool,
-                    strict_json_schema=tool.strict_json_schema,
-                )
-            else:
-                raise UserError(f"Unknown tool type: {tool.name}")  # type:ignore[reportUnreachable]
-
-        tools = [make_tool(x) for x in input.get("tools", [])]
-        handoffs: list[Handoff[Any, Any]] = [
-            Handoff(
-                tool_name=x.tool_name,
-                tool_description=x.tool_description,
-                input_json_schema=x.input_json_schema,
-                agent_name=x.agent_name,
-                strict_json_schema=x.strict_json_schema,
-                on_invoke_handoff=empty_on_invoke_handoff,
-            )
-            for x in input.get("handoffs", [])
-        ]
+        tools, handoffs = _build_tools_and_handoffs(input)
 
         try:
             return await model.get_response(
@@ -284,40 +329,63 @@ async def _noop_executor(*a: Any, **kw: Any) -> str:  # type: ignore[reportUnuse
                 prompt=input.get("prompt"),
             )
         except APIStatusError as e:
-            # Listen to server hints
-            retry_after = None
-            retry_after_ms_header = e.response.headers.get("retry-after-ms")
-            if retry_after_ms_header is not None:
-                retry_after = timedelta(milliseconds=float(retry_after_ms_header))
-
-            if retry_after is None:
-                retry_after_header = e.response.headers.get("retry-after")
-                if retry_after_header is not None:
-                    retry_after = timedelta(seconds=float(retry_after_header))
-
-            should_retry_header = e.response.headers.get("x-should-retry")
-            if should_retry_header == "true":
-                raise e
-            if should_retry_header == "false":
-                raise ApplicationError(
-                    "Non retryable OpenAI error",
-                    non_retryable=True,
-                    next_retry_delay=retry_after,
-                ) from e
-
-            # Specifically retryable status codes
-            if (
-                e.response.status_code in [408, 409, 429]
-                or e.response.status_code >= 500
-            ):
-                raise ApplicationError(
-                    f"Retryable OpenAI status code: {e.response.status_code}",
-                    non_retryable=False,
-                    next_retry_delay=retry_after,
-                ) from e
+            _raise_for_openai_status(e)
+
+    @activity.defn
+    @_auto_heartbeater
+    async def invoke_model_activity_streaming(
+        self, input: ActivityModelInput
+    ) -> ModelResponse:
+        """Streaming-aware model activity.
+
+        Calls model.stream_response(), publishes each yielded OpenAI event
+        as JSON to the pubsub side channel, and returns the ModelResponse
+        built from the final ResponseCompletedEvent. Consumers receive
+        native OpenAI event types; no normalization happens here.
+        """
+        model = self._model_provider.get_model(input.get("model_name"))
+        tools, handoffs = _build_tools_and_handoffs(input)
+
+        pubsub = PubSubClient.from_activity(batch_interval=0.1)
+        final_response = None
 
+        try:
+            async with pubsub:
+                async for event in model.stream_response(
+                    system_instructions=input.get("system_instructions"),
+                    input=input["input"],
+                    model_settings=input["model_settings"],
+                    tools=tools,
+                    output_schema=input.get("output_schema"),
+                    handoffs=handoffs,
+                    tracing=ModelTracing(input["tracing"]),
+                    previous_response_id=input.get("previous_response_id"),
+                    conversation_id=input.get("conversation_id"),
+                    prompt=input.get("prompt"),
+                ):
+                    activity.heartbeat()
+                    pubsub.publish(EVENTS_TOPIC, event.model_dump_json().encode())
+                    if isinstance(event, ResponseCompletedEvent):
+                        final_response = event.response
+        except APIStatusError as e:
+            _raise_for_openai_status(e)
+
+        if final_response is None:
             raise ApplicationError(
-                f"Non retryable OpenAI status code: {e.response.status_code}",
+                "Stream ended without ResponseCompletedEvent",
                 non_retryable=True,
-                next_retry_delay=retry_after,
-            ) from e
+            )
+
+        return ModelResponse(
+            output=final_response.output,
+            usage=Usage(
+                requests=1,
+                input_tokens=final_response.usage.input_tokens
+                if final_response.usage
+                else 0,
+                output_tokens=final_response.usage.output_tokens
+                if final_response.usage
+                else 0,
+            ),
+            response_id=final_response.id,
+        )
diff --git a/temporalio/contrib/openai_agents/_model_parameters.py b/temporalio/contrib/openai_agents/_model_parameters.py
index 55827e0d5..d5b757a4e 100644
--- a/temporalio/contrib/openai_agents/_model_parameters.py
+++ b/temporalio/contrib/openai_agents/_model_parameters.py
@@ -68,3 +68,9 @@ class ModelActivityParameters:
 
     use_local_activity: bool = False
     """Whether to use a local activity. If changed during a workflow execution, that would break determinism."""
+
+    enable_streaming: bool = False
+    """When True, the model activity uses the streaming LLM endpoint and
+    publishes token events via PubSubClient. The workflow is unaffected --
+    it still receives a complete ModelResponse. Incompatible with
+    use_local_activity (local activities do not support heartbeats)."""
diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py
index 03e689f17..9484a9f76 100644
--- a/temporalio/contrib/openai_agents/_temporal_model_stub.py
+++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+from datetime import timedelta
 
 from temporalio import workflow
 from temporalio.contrib.openai_agents._model_parameters import ModelActivityParameters
@@ -166,7 +167,35 @@ def make_tool_info(tool: Tool) -> ToolInput:
         else:
             summary = None
 
-        if self.model_params.use_local_activity:
+        if self.model_params.enable_streaming:
+            if self.model_params.use_local_activity:
+                # The streaming activity relies on heartbeats to detect a
+                # stuck LLM call and on PubSubClient.from_activity() to
+                # signal partial results back to the workflow. Local
+                # activities support neither: their result commits with
+                # the workflow task, so there is no independent task to
+                # heartbeat against or to send signals from.
+                raise ValueError(
+                    "Streaming is incompatible with local activities "
+                    "(local activities do not support heartbeats or the "
+                    "pubsub signal channel)."
+                )
+            return await workflow.execute_activity_method(
+                ModelActivity.invoke_model_activity_streaming,
+                activity_input,
+                summary=summary,
+                task_queue=self.model_params.task_queue,
+                schedule_to_close_timeout=self.model_params.schedule_to_close_timeout,
+                schedule_to_start_timeout=self.model_params.schedule_to_start_timeout,
+                start_to_close_timeout=self.model_params.start_to_close_timeout,
+                heartbeat_timeout=self.model_params.heartbeat_timeout
+                or timedelta(seconds=30),
+                retry_policy=self.model_params.retry_policy,
+                cancellation_type=self.model_params.cancellation_type,
+                versioning_intent=self.model_params.versioning_intent,
+                priority=self.model_params.priority,
+            )
+        elif self.model_params.use_local_activity:
             return await workflow.execute_local_activity_method(
                 ModelActivity.invoke_model_activity,
                 activity_input,
diff --git a/temporalio/contrib/openai_agents/_temporal_openai_agents.py b/temporalio/contrib/openai_agents/_temporal_openai_agents.py
index f7757723c..60b4b36ef 100644
--- a/temporalio/contrib/openai_agents/_temporal_openai_agents.py
+++ b/temporalio/contrib/openai_agents/_temporal_openai_agents.py
@@ -205,7 +205,11 @@ def add_activities(
             if not register_activities:
                 return activities or []
 
-            new_activities = [ModelActivity(model_provider).invoke_model_activity]
+            model_activity = ModelActivity(model_provider)
+            new_activities = [
+                model_activity.invoke_model_activity,
+                model_activity.invoke_model_activity_streaming,
+            ]
 
             server_names = [server.name for server in mcp_server_providers]
             if len(server_names) != len(set(server_names)):
diff --git a/temporalio/contrib/pubsub/DESIGN-v2.md b/temporalio/contrib/pubsub/DESIGN-v2.md
new file mode 100644
index 000000000..6d7c1c55c
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-v2.md
@@ -0,0 +1,1195 @@
+# Temporal Workflow Pub/Sub — Design Document v2
+
+Consolidated design document reflecting the current implementation.
+
+> The Python code in `sdk-python/temporalio/contrib/pubsub/` is authoritative.
+> Both this document and the Notion page
+> ["Streaming API Design Considerations"](https://www.notion.so/3478fc567738803d9c22eeb64a296e21)
+> track it. When API or wire-format facts change in code, update this doc in
+> the same commit and mirror to Notion. When new narrative (a decision, a
+> comparison) lands in either doc, port it to the other before the next
+> review cycle.
+
+## Overview
+
+A reusable pub/sub module for Temporal workflows. The workflow acts as the
+message broker — it holds an append-only log of `(topic, data)` entries.
+External clients (activities, starters, other services) publish and subscribe
+through the workflow handle using Temporal primitives (signals, updates,
+queries).
+
+The module ships as `temporalio.contrib.pubsub` in the Python SDK and is
+designed to be cross-language compatible. Payloads are opaque byte strings —
+the workflow does not interpret them.
+
+## Architecture
+
+```
+                    ┌──────────────────────────────────┐
+                    │         Temporal Workflow        │
+                    │         (PubSub broker)          │
+                    │                                  │
+                    │  ┌────────────────────────────┐  │
+                    │  │   Append-only log          │  │
+                    │  │   [(topic, data), ...]     │  │
+                    │  │   base_offset: int         │  │
+                    │  │   publisher_sequences: {}  │  │
+                    │  └────────────────────────────┘  │
+                    │                                  │
+  signal ──────────►│  __pubsub_publish (with dedup)   │
+  update ──────────►│  __pubsub_poll (long-poll)       │◄── subscribe()
+  query  ──────────►│  __pubsub_offset                 │
+                    │                                  │
+                    │  publish() ── workflow-side      │
+                    └──────────────────────────────────┘
+                              │
+                              │ continue-as-new
+                              ▼
+                    ┌──────────────────────────────────┐
+                    │  PubSubState carries:            │
+                    │    log, base_offset,             │
+                    │    publisher_sequences           │
+                    └──────────────────────────────────┘
+```
+
+## API Surface
+
+### Workflow side — `PubSub`
+
+A helper class instantiated from `@workflow.init`. Its constructor
+registers the pub/sub signal, update, and query handlers on the current
+workflow via `workflow.set_signal_handler`, `workflow.set_update_handler`,
+and `workflow.set_query_handler` — there is no base class to inherit.
+This matches how other-language SDKs will express the same pattern
+(imperative handler registration from inside the workflow body).
+
+```python
+from dataclasses import dataclass
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSub, PubSubState
+
+@dataclass
+class MyInput:
+    pubsub_state: PubSubState | None = None
+
+@workflow.defn
+class MyWorkflow:
+    @workflow.init
+    def __init__(self, input: MyInput) -> None:
+        self.pubsub = PubSub(prior_state=input.pubsub_state)
+
+    @workflow.run
+    async def run(self, input: MyInput) -> None:
+        self.pubsub.publish("status", b"started")
+        await do_work()
+        self.pubsub.publish("status", b"done")
+```
+
+Construct `PubSub(...)` once from `@workflow.init`. Include a
+`PubSubState | None` field on your workflow input and always pass it as
+`prior_state`: it is `None` on fresh starts and carries accumulated
+state across continue-as-new (see [Continue-as-New](#continue-as-new)).
+Workflows that will never continue-as-new may call `PubSub()` with no
+argument. Instantiating `PubSub` twice on the same workflow raises
+`RuntimeError`, detected via `workflow.get_signal_handler("__pubsub_publish")`.
+
+| Method / Handler | Kind | Description |
+|---|---|---|
+| `PubSub(prior_state=None)` | constructor | Initialize internal state and register handlers on the current workflow. Must be called from `@workflow.init`. |
+| `publish(topic, value)` | instance method | Append to the log from workflow code. `value` is converted via the workflow's sync payload converter (no codec). |
+| `get_state(publisher_ttl=900)` | instance method | Snapshot for CAN. Prunes dedup entries older than TTL. |
+| `drain()` | instance method | Unblock polls and reject new ones for CAN. |
+| `truncate(up_to_offset)` | instance method | Discard log entries before offset. |
+| `__pubsub_publish` | signal handler | Receives publications from external clients (with dedup). |
+| `__pubsub_poll` | update handler | Long-poll subscription: blocks until new items or drain. |
+| `__pubsub_offset` | query handler | Returns the current global offset. |
+
+### Client side — `PubSubClient`
+
+Used by activities, starters, and any code with a workflow handle.
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+
+# Preferred: factory method (enables CAN following + activity auto-detect)
+client = PubSubClient.create(temporal_client, workflow_id)
+
+# --- Publishing (with batching) ---
+# Values go through the client's data converter — including the codec
+# chain (encryption, PII-redaction, compression) — per item.
+async with client:
+    client.publish("events", TextDelta(delta="hello"))
+    client.publish("events", TextDelta(delta=" world"))
+    client.publish("events", TextComplete(), force_flush=True)
+    client.publish("raw", my_prebuilt_payload)  # zero-copy fast path
+
+# --- Subscribing ---
+# Pass result_type=T to have item.data decoded to T via the same codec
+# chain. Without result_type, item.data is the raw Payload and the
+# caller dispatches on metadata.
+async for item in client.subscribe(["events"], result_type=EventUnion):
+    print(item.topic, item.data)
+    if is_done(item):
+        break
+```
+
+| Method | Description |
+|---|---|
+| `PubSubClient.create(client, wf_id)` | Factory with explicit Temporal client and workflow id. Follows CAN in `subscribe()`. |
+| `PubSubClient.from_activity()` | Factory that pulls client and workflow id from the current activity context. Follows CAN in `subscribe()`. |
+| `PubSubClient(handle)` | From handle directly (no CAN following; no codec chain — falls back to the default converter). |
+| `publish(topic, value, force_flush=False)` | Buffer a message. `value` may be any converter-compatible object or a pre-built `Payload`. `force_flush` triggers immediate flush (fire-and-forget). |
+| `subscribe(topics, from_offset, *, result_type=None, poll_cooldown=0.1)` | Async iterator. `result_type` decodes `item.data` to the given type; omit for raw `Payload`. Always follows CAN chains when created via `create` or `from_activity`. |
+| `get_offset()` | Query current global offset. |
+
+Use as `async with` for batched publishing with automatic flush on exit.
+There is no public `flush()` method — use `force_flush=True` on `publish()`
+for immediate delivery, or rely on the background flusher and context
+manager exit flush.
+
+#### Activity convenience
+
+Inside an activity, use `PubSubClient.from_activity()` — the Temporal
+client and target workflow id come from the activity context, so the
+caller doesn't have to thread them through:
+
+```python
+@activity.defn
+async def stream_events() -> None:
+    client = PubSubClient.from_activity(batch_interval=2.0)
+    async with client:
+        for chunk in generate_chunks():
+            client.publish("events", chunk)
+            activity.heartbeat()
+```
+
+`from_activity()` is a separate factory rather than an overload of
+`create()` because silently inferring arguments outside an activity
+masks a configuration bug as a runtime error in an unrelated code
+path.
+
+## Data Types
+
+```python
+from temporalio.api.common.v1 import Payload
+
+@dataclass
+class PubSubItem:
+    topic: str
+    data: Any          # Payload by default; decoded value when
+                       # subscribe is called with result_type=T
+    offset: int = 0    # Populated at poll time
+
+@dataclass
+class PublishEntry:
+    topic: str
+    data: str          # Wire: base64(Payload.SerializeToString())
+
+@dataclass
+class PublishInput:
+    items: list[PublishEntry]
+    publisher_id: str = ""     # For exactly-once dedup
+    sequence: int = 0          # Monotonic per publisher
+
+@dataclass
+class PollInput:
+    topics: list[str]          # Filter (empty = all)
+    from_offset: int = 0       # Global offset to resume from
+
+@dataclass
+class PollResult:
+    items: list[_WireItem]     # Wire-format items
+    next_offset: int = 0       # Offset for next poll
+    more_ready: bool = False   # Truncated response; poll again
+
+@dataclass
+class PubSubState:
+    log: list[_WireItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)
+```
+
+The containing workflow input must type the field as `PubSubState | None`,
+not `Any` — `Any`-typed fields deserialize as plain dicts, losing the type.
+
+### Wire format for payloads
+
+The user-facing `data` on `PubSubItem` is a
+`temporalio.api.common.v1.Payload`, which carries both the data bytes
+and the encoding metadata written by the client's data converter and
+codec chain. Subscribers can either decode by passing `result_type=T`
+to `subscribe()` (runs the async converter chain, including the codec)
+or inspect `Payload.metadata` directly for heterogeneous topics.
+
+On the wire, every `data` string is
+`base64(Payload.SerializeToString())`. This is because the default
+JSON payload converter can serialize a top-level `Payload` as a
+signal argument but **cannot** serialize a `Payload` embedded inside
+a dataclass (it raises `TypeError: Object of type Payload is not JSON
+serializable`). Embedding the proto-serialized bytes keeps the wire
+format JSON-compatible while preserving the full `Payload` — metadata
+and all — across the signal and update round-trips. Round-trip is
+guarded by
+`tests/contrib/pubsub/test_payload_roundtrip_prototype.py`.
+
+## Design Decisions
+
+### 1. Durable streams
+
+All stream events flow through the workflow's append-only log, backed by
+Temporal's persistence layer. There is no ephemeral streaming option.
+
+**Trade-off.** Ephemeral streams that skip the Temporal server, or transit it
+with lower durability, would be less resource-intensive. We chose durable
+streams because:
+
+1. **Simpler programming model.** One event path, one source of truth. The
+   application does not need merge logic, reconnection handling for a second
+   channel, or fallback behavior when the ephemeral path fails.
+2. **Reliability.** Events survive worker crashes, workflow restarts, and
+   continue-as-new. A subscriber that connects after a failure sees the
+   complete history, not a gap where the ephemeral channel lost events.
+3. **Correctness.** With a single path, subscriber code is the same whether
+   processing events live or replaying them after a reconnect. A separate
+   ephemeral path for latency-sensitive events (e.g., token deltas) would
+   create a second code path through the frontend — additional complexity
+   that is difficult to test.
+
+The cost is latency: events round-trip through the Temporal server before
+reaching the subscriber. Batching (see [Batching is built into the
+client](#7-batching-is-built-into-the-client)) manages this — a 0.1-second
+interval for token streaming keeps latency acceptable while amortizing
+per-signal overhead.
+
+Durability is Temporal's core value proposition. Making the stream durable by
+default aligns with the platform.
+
+### 2. Topics are plain strings, no hierarchy
+
+Topics are exact-match strings. No prefix matching, no wildcards. A subscriber
+provides a list of topic strings to filter on; an empty list means "all topics."
+
+### 3. Items are Temporal `Payload`s, not opaque bytes
+
+The workflow stores each item as a
+`temporalio.api.common.v1.Payload` — the same type signals, updates,
+and activities use. Publishers pass any value the client's data
+converter accepts (or a pre-built `Payload` for zero-copy);
+subscribers either receive the raw `Payload` (for heterogeneous
+topics) or pass `result_type=T` to have it decoded.
+
+This replaces an earlier "opaque byte strings" design. We switched
+because the opaque-bytes path **skipped the user's codec chain** —
+encryption, PII-redaction, and compression codecs saw only the
+outer `PublishInput` envelope, not the individual items. For users
+who expect their codec chain to cover every piece of data flowing
+through Temporal, that is a silent compliance/correctness gap.
+
+The three original arguments for opaque bytes don't hold up:
+
+1. **Decoupling from the data converter.** Signals and updates
+   accept `Any` without making handlers generic; `Payload.metadata`
+   carries per-value encoding info. Pub/sub can do the same.
+2. **Layering — transport vs. application.** Every other Temporal
+   API surface (signals, updates, activity args, workflow args)
+   uses `Payload`. Pub/sub was the outlier.
+3. **Type hints at decode time.** Subscribers pass `result_type` at
+   the subscribe boundary — the same pattern as
+   `execute_update(result_type=...)`.
+
+**Codec runs once, at the envelope level.** Both
+`PubSubClient.publish` and `PubSub.publish` turn values into
+`Payload` via the **sync** payload converter. The codec chain is
+not applied per item. It runs once — on the `__pubsub_publish`
+signal envelope (client → workflow path) and on the
+`__pubsub_poll` update envelope (workflow → subscriber path) —
+because Temporal's SDK already runs `DataConverter.encode` on
+signal and update args. Running the codec per item *as well*
+would double-encrypt / double-compress, and compressing
+already-encrypted data is pointless. The per-item `Payload` still
+carries the encoding metadata (`encoding: json/plain`,
+`messageType`, etc.), so `subscribe(result_type=T)` works
+without needing the codec to have run per item.
+
+**Wire format.** `PublishEntry.data` and `_WireItem.data` are
+base64-encoded `Payload.SerializeToString()` bytes, not nested
+`Payload` protos, because the default JSON converter cannot
+serialize a `Payload` embedded inside a dataclass. See [Data
+Types — Wire format for payloads](#wire-format-for-payloads).
+
+### 4. Global offsets, NATS JetStream model
+
+> 🚪 **One-way door.** Once subscribers persist and resume from global integer
+> offsets — stored in SSE `Last-Event-ID`, BFF reconnection state, and
+> client-side cursor logic — the offset semantics are baked into the wire
+> protocol. Switching to per-topic offsets later would break every existing
+> subscriber's resume path. This is the right choice (cursor portability and
+> cross-topic ordering are valuable), but recognize that every consumer built
+> against this API will assume a single integer is a complete stream position.
+
+Every entry gets a global offset from a single counter. Subscribers filter by
+topic but advance through the global offset space.
+
+We surveyed offset models across Kafka, Redis Streams, NATS JetStream, PubNub,
+Google Pub/Sub, RabbitMQ Streams, and Amazon SQS/SNS. No major system provides
+a true global offset across independent topics. The two closest:
+
+- **NATS JetStream**: one stream captures multiple subjects via wildcards, with
+  a single sequence counter. This is our model.
+- **PubNub**: wall-clock nanosecond timestamp as cursor across channels.
+
+We evaluated six alternatives for handling the information leakage that global
+offsets create (a single-topic subscriber can infer other-topic activity from
+gaps): per-topic counts, opaque cursors, encrypted cursors, per-topic lists,
+per-topic offsets with cursor hints, and accepting the leakage.
+
+| Option | Systems | Leakage | Cross-topic ordering | Resume cost | Cursor portability |
+|---|---|---|---|---|---|
+| Per-topic count as cursor | *(theoretical)* | None | Preserved | O(n) or extra state | Coupled to filter |
+| Opaque cursor wrapping global offset | *(theoretical)* | Observable | Preserved | O(1) | Filter-independent |
+| Encrypted global offset | *(theoretical)* | None | Preserved | O(1) | Filter-independent |
+| Per-topic / per-partition lists | Kafka, Redis Streams, RabbitMQ Streams, Google Pub/Sub, SQS/SNS | None | **Lost** | O(1) | N/A |
+| **Global offsets (chosen)** | NATS JetStream, PubNub (timestamp variant) | Contained at BFF | Preserved | O(new items) | Filter-independent |
+| Per-topic offsets with cursor hints | *(theoretical)* | None | Preserved | O(new items) | Per-topic only |
+
+**Decision:** Global offsets are the right choice for workflow-scoped pub/sub.
+
+**Why not per-topic offsets?** The most sophisticated alternative — per-topic
+offsets with opaque cursors carrying global position hints — was rejected
+for three reasons:
+
+1. **The threat model doesn't apply.** Information leakage assumes untrusted
+   multi-tenant subscribers who shouldn't learn about each other's traffic
+   volumes. That's Kafka's world — separate consumers for separate services.
+   In workflow-scoped pub/sub, the subscriber is the BFF: trusted server-side
+   code that could just as easily subscribe to all topics.
+
+2. **Cursor portability.** A global offset is a stream position that works
+   regardless of which topics you filter on. You can subscribe to `["events"]`,
+   then later subscribe to `["events", "thinking"]` with the same offset.
+   Per-topic cursors are coupled to the filter — you need a separate cursor per
+   topic, and adding a topic to your subscription requires starting it from the
+   beginning.
+
+3. **Unjustified complexity.** Per-topic cursors require cursor
+   parsing/formatting, a `topic_counts` dict that survives continue-as-new, a
+   multi-cursor alignment algorithm, and stale-hint fallback paths. For log
+   sizes of thousands of items where a filtered slice is microseconds, this
+   machinery adds cost without measurable benefit.
+
+**Leakage is contained at the BFF trust boundary.** The global offset stays
+between workflow and BFF. The BFF assigns its own gapless SSE event IDs to the
+browser. The global offset never reaches the end client. See
+[Information Leakage and the BFF](#information-leakage-and-the-bff) for the
+full mechanism.
+
+### 5. No topic creation
+
+Topics are implicit. Publishing to a topic creates it. Subscribing to a
+nonexistent topic returns no items and waits for new ones.
+
+### 6. `force_flush` forces a flush, does not reorder
+
+`force_flush=True` causes the client to immediately flush its buffer. It
+does NOT reorder items — the flushed item appears in its natural
+position after any previously-buffered items. The purpose is
+latency-sensitive delivery, not importance ranking.
+
+### 7. Session ordering
+
+Publications from a single client are ordered. This relies on two Temporal
+guarantees:
+
+> "Signals are delivered in the order they are received by the Cluster and
+> written to History."
+> ([docs](https://docs.temporal.io/workflows#signal))
+
+Specifically: (1) signals sent sequentially from the same client appear in
+workflow history in send order, and (2) signal handlers are invoked in
+history order. The guarantee breaks down only for *concurrent* signals — if
+two signal RPCs are in flight simultaneously, their order in history is
+nondeterministic. The `PubSubClient` flush lock (`_flush_lock`) ensures
+signals are never in flight concurrently from a single client:
+
+1. Acquire lock
+2. `await handle.signal(...)` — blocks until server writes to history
+3. Release lock
+
+Combined with the workflow's single-threaded signal processing (the
+`__pubsub_publish` handler is synchronous — no `await`), items within and
+across batches from a single publisher preserve their publish order.
+
+Concurrent publishers get a total order in the log (the workflow serializes
+all signal processing), but the interleaving is nondeterministic — it depends
+on arrival order at the server. Per-publisher ordering is preserved. This is
+formally verified as `OrderPreservedPerPublisher`.
+
+Once items are in the log, their order is stable — reads are repeatable.
+
+### 8. Batching is built into the client
+
+`PubSubClient` includes a Nagle-like batcher (buffer + timer). The async
+context manager starts a background flush task; exiting cancels it and does a
+final flush. Batching amortizes Temporal signal overhead.
+
+Parameters:
+- `batch_interval` (default 2.0s): timer between automatic flushes.
+- `max_batch_size` (optional): auto-flush when buffer reaches this size.
+
+### 9. Subscription is poll-based, exposed as async iterator
+
+The fundamental primitive is an offset-based long-poll: the subscriber sends
+`from_offset` and gets back items plus `next_offset`. `__pubsub_poll` is a
+Temporal update with `wait_condition`. `subscribe()` wraps this in an
+`AsyncIterator` with a configurable `poll_cooldown` (default 0.1s) to
+rate-limit polls.
+
+**Trade-off.** The alternative is server-push — the pub/sub system executes
+a callback on the subscriber. Pull is better aligned with durable streams:
+
+1. **Back-pressure is natural.** A slow subscriber just polls less
+   frequently. Push requires the server to implement flow control to avoid
+   overwhelming subscribers — or risk dropping messages, defeating the
+   durable-stream purpose.
+2. **The subscriber controls its own read position.** It can replay from an
+   earlier offset, skip ahead, or resume from exactly where it left off.
+   Push requires the server to track per-subscriber delivery state.
+3. **Durable streams are data at rest.** The log exists regardless of
+   whether anyone is reading it. Pull treats the log as something to read
+   from; push treats it as a pipe to deliver through, which fights the
+   durability model.
+
+Temporal's architecture reinforces this — there is no server-push mechanism
+for external clients. Updates with `wait_condition` are the closest
+approximation: the workflow blocks until data is available, making it
+behave like push from the subscriber's perspective while remaining pull on
+the wire.
+
+**Both layers are exposed.** The offset-based poll is a first-class part
+of the API, not hidden behind the iterator. The BFF uses offsets directly
+to map SSE event IDs to global offsets for reconnection. Application code
+that just wants to process items in order uses the iterator. Different
+consumers use different layers.
+
+**Poll efficiency.** The poll slices `self._log[from_offset - base_offset:]`
+and filters by topic. The common case — single topic, continuing from last
+poll — is O(new items since last poll). The global offset points directly to
+the resume position with no scanning or cursor alignment. Multi-topic polls
+are the same cost: one slice, one filter pass. The worst case is a poll from
+offset 0 (full log scan), which only happens on first connection or after the
+subscriber falls behind.
+
+**Fan-out is per-poll, not shared.** Each `__pubsub_poll` update is an
+independent Temporal update RPC. The handler has no registry of active
+subscribers; every call executes `_on_poll` from scratch with its own
+`from_offset` closure and topic set. When a publish grows the log,
+Temporal's `wait_condition` machinery re-evaluates every pending predicate
+and wakes each one whose condition is now true. Each then slices the same
+shared log independently, applies its own topic filter, and returns its own
+`PollResult` on its own update response.
+
+The consequences:
+
+- Two subscribers on the same topics from the same offset both receive the
+  items — each item travels the wire **twice**, once per update response.
+- Two subscribers from different offsets each see their own slice; the
+  overlapping range is serialized into both responses.
+- Two subscribers with disjoint topics each see a filtered subset; no items
+  are duplicated across their responses, but the log is walked twice.
+
+This is deliberate. Temporal updates are 1:1 RPCs, not a shared delivery
+fabric. There is no intra-workflow subscriber registry, no cross-poll
+dedup, no broadcast. Fan-out cost scales linearly with subscriber count,
+but there's no shared state between polls to get wrong and no delivery-order
+ambiguity between them. Applications that need to multiplex a single
+subscription across many local consumers should do so on the client side,
+below the `subscribe()` iterator — one poll stream feeding N in-process
+consumers. A workflow-side shared fan-out is listed under
+[Future Work](#future-work).
+
+### 10. Workflow can publish but should not subscribe
+
+Workflow code can call `self.publish()` directly — this is deterministic.
+Reading from the log within workflow code is possible but breaks the
+failure-free abstraction because external publishers send data via signals
+(non-deterministic inputs), and branching on signal content creates
+replay-sensitive code paths.
+
+### 11. `base_offset` for truncation
+
+The log carries a `base_offset`. All offset arithmetic uses
+`offset - base_offset` to index into the log, so discarding a prefix of
+consumed entries and advancing `base_offset` keeps global offsets
+monotonic. If a poll's `from_offset` is below `base_offset`, the
+subscriber has fallen behind truncation and the poll fails with a
+non-retryable `TruncatedOffset` error.
+
+Because the module targets continue-as-new as the standard pattern for
+long-running workflows, workflow history size is not the binding
+constraint — CAN rolls history forward indefinitely. The binding
+constraint is the in-memory log growing between CAN boundaries. Voice
+streaming workflows have shown this matters in practice: a session can
+accumulate tens of thousands of small audio/text events long before CAN
+is triggered, and the workflow needs a way to release entries the
+subscriber has already consumed without waiting for a CAN cycle.
+`PubSub.truncate(up_to_offset)` exposes this.
+
+### 12. No timeout on long-poll
+
+`wait_condition` in the poll handler has no timeout. The poll blocks
+indefinitely until one of three things happens:
+
+1. **New data arrives** — the `len(log) > offset` condition fires.
+2. **Draining for continue-as-new** — `PubSub.drain()` sets the flag.
+3. **Client disconnects** — the BFF drops the SSE connection, cancels the
+   update RPC, and the handler becomes an inert coroutine cleaned up at
+   the next drain cycle.
+
+A previous design used a 5-minute timeout as a defensive "don't block
+forever" mechanism. This was removed because:
+
+- **It adds unnecessary history events.** Every poll creates a `TimerStarted`
+  event. For a streaming session doing hundreds of polls, this doubles the
+  history event count and accelerates approach to the ~50K event CAN threshold.
+- **The drain mechanism already handles cleanup.** `PubSub.drain()` unblocks
+  all waiting polls, and the update validator rejects new polls, so
+  `all_handlers_finished()` converges without timers.
+- **Zombie polls are harmless.** If a client crashes without cancelling, its
+  poll handler is just an in-memory coroutine waiting on a condition. It
+  consumes no Temporal actions and is cleaned up at the next CAN cycle.
+
+### 13. Signals for publish, updates for poll
+
+Publishing uses signals (fire-and-forget); subscription uses updates
+(request-response with `wait_condition`). These choices are deliberate.
+
+**Why signals for publish:**
+
+- **Non-blocking flush.** The activity can buffer tokens at whatever rate
+  the LLM produces them. `handle.signal(...)` enqueues at the server and
+  returns immediately — the publisher is never throttled by the workflow's
+  processing speed.
+- **Lower history cost.** Each signal adds 1 event (`WorkflowSignalReceived`).
+  An update adds 2 (`UpdateAccepted` + `UpdateCompleted`). For a streaming
+  session with hundreds of flushes, signals halve the history growth rate and
+  delay the CAN threshold.
+- **No concurrency limits.** Temporal Cloud enforces per-workflow update
+  limits. Signals have no equivalent limit, making them safer for
+  high-throughput publishing.
+
+**Why updates for poll:**
+
+- The caller needs a result (the items). Blocking is the desired behavior
+  (long-poll semantics). `wait_condition` inside an update handler is the
+  natural fit.
+
+**Why not updates for publish?** The main attraction would be platform-native
+exactly-once via Update ID, eliminating application-level dedup. However:
+
+1. Update ID dedup does not persist across continue-as-new. For CAN workflows,
+   application-level dedup is required regardless
+   ([temporal/temporal#6375](https://github.com/temporalio/temporal/issues/6375)).
+2. Each flush would block for a round-trip to the worker (~10-50ms), throttling
+   the publisher.
+3. The 2x history cost accelerates approach to the CAN threshold.
+
+If the cross-CAN dedup gap is fixed and backpressure becomes desirable,
+switching publish to updates is a mechanical change — the dedup protocol,
+dedup protocol, and mixin handler logic are unchanged.
+
+## Design Principles
+
+### Deduplication follows the end-to-end principle
+
+**The end-to-end principle** (Saltzer, Reed, Clark, "End-to-End Arguments in
+System Design," 1984): a function can be correctly and completely
+implemented only with the knowledge available at the endpoints of a
+communication system. Implementing it at intermediate layers may be
+redundant or of little value, because the endpoints must handle it
+regardless. The corollary: implement a function at the lowest layer that
+can implement it *completely*. Don't partially implement it at an
+intermediate layer.
+
+> 🚪 **One-way door.** The contract that the stream is an append-only log of
+> *all* attempts — including failed ones — is irreversible once subscribers
+> build reducers around it. Every frontend reducer expects to see interleaved
+> retries and uses application-level events (e.g., `AGENT_START` resetting the
+> text accumulator) to reconcile. If the transport later started filtering
+> retries, existing reducers would break — they would miss the state
+> transitions they depend on, and there would be two different behaviors
+> depending on whether the subscriber was connected live (saw the failed
+> attempt) or replayed after reconnect (didn't). This is the correct design,
+> but it is a permanent commitment.
+
+**Our design decision.** We do not filter out events from failed activity
+attempts. When an activity retries — for example, an LLM call that times
+out, or a tool call that fails because a worker crashes — its previous
+attempt's streaming events remain in the log. The new attempt publishes
+fresh events. The subscriber sees both.
+
+**Why the pub/sub layer cannot handle this completely.** When an LLM
+activity retries, the model runs again and produces different output —
+different tokens, different wording, a different response. The pub/sub
+layer sees two different message sequences. It has no way to know these
+represent the same logical operation. Only the application knows that the
+second response supersedes the first.
+
+We could have added retry semantics to the pub/sub protocol — for example,
+tagging messages with attempt numbers and letting the transport filter
+superseded attempts, similar to signal-level dedup. But this would be
+incomplete, and the incompleteness creates a real problem: if the
+transport scrubs failed-attempt events, but the subscriber already saw
+them in real time (before the retry happened), the subscriber now has two
+code paths — one for the live stream (which included the failed attempt)
+and one for replay after reconnect (which doesn't). Two paths through the
+frontend for the same logical scenario is a source of bugs and is
+difficult to test. The transport's filtering doesn't save the subscriber
+any work; the subscriber needs robust reconciliation logic regardless.
+
+**The contract: an append-only log of attempts.** The stream records what
+happened, including failed attempts. The subscriber decides how to present
+this to the user. In our frontend, the application-layer reducer handles
+reconciliation: a new `TEXT_COMPLETE` event overwrites the previous one
+(set semantics), and an `AGENT_START` event resets the text accumulator so
+the retry's tokens replace the failed attempt's partial output. This
+reducer produces the same state whether it processes events live or
+replays them on reconnect — there is only one code path.
+
+**The pub/sub layer handles what it can handle completely.** Signal-level
+dedup (same publisher ID + same sequence number) is fully resolvable at the
+transport layer — the layer has all the information it needs, so it
+deduplicates there. Activity-level dedup cannot be fully resolved at the
+transport layer — it requires application context — so the pub/sub layer
+does not attempt it. Each layer handles the duplicates it can completely
+resolve.
+
+## Exactly-Once Publish Delivery
+
+External publishers get exactly-once delivery through publisher ID + sequence
+number deduplication, following the Kafka producer model.
+
+### Problem
+
+`flush()` sends items via a Temporal signal. If the signal call raises after
+the server accepted it (e.g., network timeout on the response), the client
+cannot distinguish delivered from not-delivered. Without dedup, the client
+must choose between at-most-once (data loss) and at-least-once (silent
+duplication).
+
+### Solution
+
+Each `PubSubClient` instance generates a UUID (`publisher_id`) on creation.
+Each `flush()` increments a monotonic `sequence` counter. The signal payload
+includes both. The workflow tracks the highest seen sequence per publisher in
+`_publisher_sequences: dict[str, int]` and rejects any signal with
+`sequence <= last_seen`.
+
+```
+Client                              Workflow
+  │                                    │
+  │  signal(publisher_id, seq=1)       │
+  │───────────────────────────────────►│ seq 1 > 0 → accept, record seq=1
+  │                                    │
+  │  signal(publisher_id, seq=1)       │  (retry after timeout)
+  │───────────────────────────────────►│ seq 1 <= 1 → reject (duplicate)
+  │                                    │
+  │  signal(publisher_id, seq=2)       │
+  │───────────────────────────────────►│ seq 2 > 1 → accept, record seq=2
+```
+
+### Client-side flush
+
+```python
+async def _flush(self) -> None:
+    async with self._flush_lock:
+        if self._pending is not None:
+            # Retry failed batch with same sequence
+            batch = self._pending
+            seq = self._pending_seq
+        elif self._buffer:
+            # New batch
+            seq = self._sequence + 1
+            batch = self._buffer
+            self._buffer = []
+            self._pending = batch
+            self._pending_seq = seq
+        else:
+            return
+        try:
+            await self._handle.signal(
+                "__pubsub_publish",
+                PublishInput(items=batch, publisher_id=self._publisher_id,
+                             sequence=seq),
+            )
+            self._sequence = seq     # advance confirmed sequence
+            self._pending = None     # clear pending
+        except Exception:
+            pass                     # pending stays for retry
+            raise
+```
+
+- **Separate pending from buffer**: failed batches stay in `_pending`, not
+  restored to `_buffer`. New `publish()` calls during retry go to the fresh
+  buffer. This prevents the data-loss bug where items would be merged into a
+  retry batch under a different sequence number.
+- **Retry with same sequence**: on failure, the next `_flush()` retries the
+  same `_pending` with the same `_pending_seq`. If the signal was delivered
+  but the client saw an error, the workflow deduplicates the retry.
+- **Sequence advances only on success**: `_sequence` (confirmed) is updated
+  only after the signal call returns without error.
+- **Lock for coalescing**: concurrent `_flush()` callers queue on the lock.
+- **max_retry_duration**: if set, the client gives up retrying after this
+  duration and raises `TimeoutError`. Must be less than the workflow's
+  `publisher_ttl` to preserve exactly-once guarantees.
+
+### Dedup state and TTL pruning
+
+`publisher_sequences` is `dict[str, int]` — bounded by number of publishers
+(typically 1-2), not number of flushes. Carried through continue-as-new in
+`PubSubState`. If `publisher_id` is empty (workflow-internal publish),
+dedup is skipped.
+
+`publisher_last_seen` tracks the last `workflow.time()` each publisher was
+seen. During `PubSub.get_state(publisher_ttl=900)`, entries older than TTL
+are pruned to bound memory across long-lived workflow chains.
+
+**Safety constraint**: `publisher_ttl` must exceed the client's
+`max_retry_duration`. If a publisher's dedup entry is pruned while it still
+has a pending retry, the retry could be accepted as new, creating duplicates.
+
+### Scope: what pub/sub dedup does and does not handle
+
+Duplicates arise at three points in the pipeline. Each layer handles the
+duplicates it introduces — applying the end-to-end principle (Saltzer, Reed,
+Clark 1984).
+
+```
+LLM API  -->  Activity  -->  PubSubClient  -->  Workflow Log  -->  BFF/SSE  -->  Browser
+  (A)                            (B)                                (C)
+```
+
+| Type | Cause | Handled by |
+|---|---|---|
+| A: Duplicate LLM work | Activity retry produces a second, semantically equivalent but textually different response | Application layer (activity idempotency keys, workflow orchestration) |
+| B: Duplicate signal batches | Signal retry after ambiguous failure delivers the same `(publisher_id, sequence)` batch twice | **Pub/sub layer** (`sequence <= last_seen` rejection) |
+| C: Duplicate SSE events | Browser reconnects and BFF replays previously-delivered events | Delivery layer (SSE `Last-Event-ID`, idempotent frontend reducers) |
+
+**Why Type A doesn't belong here.** Data escapes to the subscriber during the
+first LLM call — tokens are consumed, forwarded to the browser, and rendered
+before any retry occurs. By the time a retry produces a duplicate response,
+the original is already consumed. The pub/sub layer has no opportunity to
+suppress it, and resolution requires application semantics (discard, replace,
+merge) that the transport layer has no knowledge of.
+
+**Why Type B must be here.** The consumer sees `PubSubItem(topic, data)` with
+no unique ID. If the workflow accepted a duplicate batch, the duplicates would
+get fresh offsets and be indistinguishable from originals. Content-based dedup
+has false positives (an LLM legitimately produces the same token twice; a
+status event like `{"type":"THINKING_START"}` repeats across turns). The
+`(publisher_id, sequence)` check is the only correct implementation — it
+preserves transport encapsulation and uses context only the transport layer
+has.
+
+**Why Type C doesn't belong here.** SSE reconnection is below the pub/sub
+layer. The BFF assigns gapless event IDs and maps `Last-Event-ID` back to
+global offsets (see [Information Leakage and the BFF](#information-leakage-and-the-bff)).
+
+## Continue-as-New
+
+### Problem
+
+The pub/sub mixin accumulates workflow history through signals (each
+`__pubsub_publish`) and updates (each `__pubsub_poll` response). Over a
+streaming session, history grows toward the ~50K event threshold. CAN resets
+the history while carrying the canonical log copy forward.
+
+### State
+
+```python
+@dataclass
+class PubSubState:
+    log: list[PubSubItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)
+```
+
+`PubSub(prior_state=...)` restores all four fields. `PubSub.get_state()`
+snapshots them.
+
+### Draining
+
+A long-poll `__pubsub_poll` blocks indefinitely until new data arrives. To
+allow CAN to proceed, draining uses two mechanisms:
+
+1. **`PubSub.drain()`** sets a flag that unblocks all waiting poll handlers
+   (the `or self._draining` clause in `wait_condition`).
+2. **Update validator** rejects new polls when draining, so no new handlers
+   start and `all_handlers_finished()` stabilizes.
+
+```python
+# CAN sequence in the parent workflow:
+self.pubsub.drain()
+await workflow.wait_condition(workflow.all_handlers_finished)
+workflow.continue_as_new(args=[WorkflowInput(
+    pubsub_state=self.pubsub.get_state(),
+)])
+```
+
+### Client-side CAN following
+
+`subscribe()` always follows CAN chains when the client was created via
+`for_workflow()`. When a poll fails with
+`WorkflowUpdateRPCTimeoutOrCancelledError`, the client calls `describe()` on
+the handle. If the status is `CONTINUED_AS_NEW`, it gets a fresh handle for
+the same workflow ID (targeting the latest run) and retries the poll from the
+same offset.
+
+```python
+async def _follow_continue_as_new(self) -> bool:
+    if self._client is None:
+        return False
+    try:
+        desc = await self._handle.describe()
+    except Exception:
+        return False
+    if desc.status == WorkflowExecutionStatus.CONTINUED_AS_NEW:
+        self._handle = self._client.get_workflow_handle(self._workflow_id)
+        return True
+    return False
+```
+
+The `describe()` check prevents infinite loops: if the workflow completed or
+failed (not CAN), the subscriber stops instead of retrying.
+
+### Offset continuity
+
+Since the full log is carried forward:
+
+- Pre-CAN: offsets `0..N-1`, log length N.
+- Post-CAN: `PubSub(prior_state=...)` restores N items. New appends start
+  at offset N.
+- A subscriber at offset K resumes seamlessly against the new run.
+
+### Edge cases
+
+**Payload size limit.** The full log in CAN input could approach Temporal's
+2 MB limit for very long sessions. Mitigation: truncation (discarding consumed
+entries before CAN) is the natural extension, supported by `base_offset`.
+
+**Signal delivery during CAN.** A publisher sending mid-CAN may get errors if
+its handle is pinned to the old run. The workflow should ensure activities
+complete before triggering CAN.
+
+**Concurrent subscribers.** Each maintains its own offset. Sharing a
+`PubSubClient` across concurrent `subscribe()` calls is safe.
+
+## Information Leakage and the BFF
+
+Global offsets leak cross-topic activity (a single-topic subscriber sees gaps).
+This is acceptable within the pub/sub API because the subscriber is the BFF —
+trusted server-side code. The leakage must not reach the end client (browser).
+
+### The problem
+
+If the BFF forwarded `PollResult.next_offset` to the browser (e.g., as an SSE
+reconnection cursor), the browser could observe gaps and infer activity on
+topics it is not subscribed to. Even if the offset is "opaque," a monotonic
+integer with gaps is trivially inspectable.
+
+### Options considered
+
+We evaluated four approaches for browser-side reconnection:
+
+1. **BFF tracks the cursor server-side.** The BFF maintains a per-session
+   `session_id → last_offset` mapping. The browser reconnects with just the
+   session ID. On BFF restart, cursors are lost — fall back to replaying from
+   turn start.
+
+2. **Opaque token from the BFF.** The BFF wraps the global offset in an
+   encoded or encrypted token. The browser passes it back on reconnect.
+   `base64(offset)` is trivially reversible (security theater); real encryption
+   needs a key and adds a layer for marginal benefit over option 1.
+
+3. **BFF assigns SSE event IDs with `Last-Event-ID`.** The BFF emits SSE
+   events with `id: 1`, `id: 2`, `id: 3` (a BFF-local counter per stream).
+   On reconnect, the browser sends `Last-Event-ID` (built into the SSE spec).
+   The BFF maps that back to a global offset internally.
+
+4. **No mid-stream resume.** Browser reconnects, BFF replays from start of
+   the current turn. Frontend deduplicates. Simplest, but replays more data
+   than necessary.
+
+### Decision: SSE event IDs (option 3)
+
+The BFF assigns gapless integer IDs to SSE events and maintains a small
+mapping from SSE event index to global offset. The browser never sees the
+workflow's offset — it sees the BFF's event numbering.
+
+```python
+sse_id = 0
+sse_id_to_offset: dict[int, int] = {}
+
+start_offset = await pubsub.get_offset()
+async for item in pubsub.subscribe(topics=["events"], from_offset=start_offset):
+    sse_id += 1
+    sse_id_to_offset[sse_id] = item_global_offset
+    yield f"id: {sse_id}\ndata: {item.data}\n\n"
+```
+
+On reconnect, the browser sends `Last-Event-ID: 47`. The BFF looks up the
+corresponding global offset and resumes the subscription from there.
+
+The BFF is already per-session and stateful (it holds the SSE connection).
+The `sse_id → global_offset` mapping is negligible additional state. On BFF
+restart, the mapping is lost — fall back to replaying from turn start (option
+4), which is acceptable because agent turns produce modest event volumes and
+the frontend reducer is idempotent.
+
+This uses the SSE spec as designed: `Last-Event-ID` exists for exactly this
+reconnection pattern.
+
+## Cross-Language Protocol
+
+Any Temporal client in any language can interact with a pub/sub workflow by:
+
+1. **Publishing**: Signal `__pubsub_publish` with `PublishInput` payload
+2. **Subscribing**: Execute update `__pubsub_poll` with `PollInput`, loop
+3. **Checking offset**: Query `__pubsub_offset`
+
+Double-underscore prefix on handler names avoids collisions with application
+signals/updates. The envelope types are simple composites of strings, bytes,
+and ints — representable in every Temporal SDK's default data converter.
+
+**Requires the default (JSON) data converter.** The wire protocol depends on
+all participants — workflow, publishers, and subscribers — using the default
+JSON data converter. A custom converter (protobuf, encryption codecs) would
+change how the envelope types serialize, breaking cross-language interop.
+This is also why payload data is opaque bytes: the pub/sub layer controls the
+envelope format (guaranteed JSON-safe), while the application controls payload
+serialization independently.
+
+## Compatibility
+
+> 🚪 **One-way door (two parts).**
+>
+> **Immutable handler names.** `__pubsub_publish`, `__pubsub_poll`, and
+> `__pubsub_offset` are permanent wire-level entry points. The escape hatch —
+> versioned handler names like `__pubsub_v2_poll` — gets more expensive over
+> time: the mixin must register all supported versions, with no discovery
+> mechanism for which versions a workflow supports.
+>
+> **No version field.** Committing to additive-only evolution means the *only*
+> path for a true breaking change is versioned handler names. If the
+> additive-only discipline ever fails — an existing field's semantics need to
+> change, not just a new field added — there is no graceful migration path
+> within a single handler. The argument against a version field is sound
+> (signals are fire-and-forget, so version rejection equals silent data loss),
+> but it means the protocol's evolvability hinges entirely on never needing to
+> change existing field semantics.
+
+The wire protocol evolves under four rules to prevent accidental breakage by
+future contributors.
+
+### Alternatives considered
+
+We evaluated and rejected five approaches to protocol evolution in favor of
+additive-only.
+
+**Version field in payloads.** Add `version: int` to each wire type and have
+the receiver check it. Fatal flaw: signals are fire-and-forget. If a v1
+workflow receives a v2 signal and rejects it based on version, the publisher
+never learns the signal was rejected — silent data loss. Strictly worse than
+the current behavior, where unknown fields are harmlessly dropped by
+Temporal's JSON deserializer. For updates (poll), a version mismatch could
+return an error, but this only helps if you change the semantics of an
+existing field — which you should not do (that is a new handler, not a
+version bump).
+
+**Versioned handler names** (e.g., `__pubsub_v2_poll`). The most robust
+option — creates entirely separate protocol surfaces so old and new code
+never interact. But premature: the mixin must register handlers for all
+supported versions, the client must probe which versions exist (Temporal
+has no "does this handler exist?" primitive), and dead code accumulates.
+Reserved as the escape hatch for a future true breaking change.
+
+**Protocol negotiation.** Client declares version in poll, workflow
+responds with what it supports. Turns the mixin into a version-dispatching
+router. Disproportionate complexity. Temporal's Worker Versioning (Build ID
+routing) solves this better at the infrastructure level — route tasks to
+compatible workers rather than negotiating at the message level.
+
+**SDK version embedding.** Couples the protocol to the SDK release cadence.
+SDK version 2.0 might change zero protocol fields; SDK version 1.7 might
+change three. The version number becomes meaningless noise.
+
+**Accepting silent incompatibility.** Letting version drift just break
+silently. Unacceptable for a durable-stream contract: a v2 subscriber
+hitting a v1 workflow should see older fields default, not corrupt state.
+
+**Why additive-only works.** Every protocol change to date has followed
+the same pattern: new field with a default that preserves pre-feature
+behavior. This matches Protocol Buffers wire compatibility rules (never
+change the meaning of an existing field number; always provide defaults
+for new fields) and Avro's schema evolution model. Temporal's own
+mechanisms cover the hard cases:
+
+- **Worker Versioning (Build IDs):** For true breaking changes, deploy v2
+  mixin on a new Build ID. Old workflows continue on old workers; new
+  workflows start on new workers. Strictly more powerful than
+  message-level versioning because it operates at the workflow execution
+  level.
+- **`workflow.patched()`:** For in-workflow behavior branching during
+  replay. Gates old vs. new logic within the same workflow code during
+  transition periods.
+
+**Ecosystem parallel.** Kafka's inter-broker protocol uses explicit version
+numbers because brokers in a cluster must negotiate capabilities at
+connection time — a fundamentally different topology from our
+single-workflow-instance model. Our pattern is closer to protobuf wire
+evolution: the schema is the contract, defaults handle absence, and
+breaking changes get a new message type (handler name).
+
+### 1. Additive-only wire evolution
+
+New fields on `PublishInput`, `PollInput`, `PollResult`, and `PubSubState` must
+have defaults. Existing field semantics must not change. Temporal's JSON data
+converter drops unknown fields on deserialization and uses defaults for missing
+fields, so additive changes are safe in both directions (new client → old
+workflow, and vice versa). This is the same model as Protocol Buffers wire
+compatibility.
+
+### 2. Handler names are immutable
+
+`__pubsub_publish`, `__pubsub_poll`, and `__pubsub_offset` will never change
+meaning. If a future change is incompatible with additive evolution, the correct
+mechanism is a new handler name (e.g., `__pubsub_v2_poll`) — creating an
+entirely separate protocol surface so old and new code never interact.
+
+### 3. `PubSubState` must be forward-compatible
+
+New fields use `field(default_factory=...)` or scalar defaults. Old state loaded
+into new code works (new fields get defaults). New state loaded into old code
+works (unknown fields dropped by the JSON deserializer). This ensures seamless
+continue-as-new across mixed-version deployments.
+
+### 4. No application-level version negotiation
+
+We do not add version fields to payloads, and we do not negotiate protocol
+versions between client and workflow. The reasons:
+
+- **Signals cannot return errors.** A version field that the workflow checks on a
+  signal creates silent data loss: the workflow rejects the signal, but the
+  client (which used fire-and-forget delivery) never learns it was rejected.
+  This is strictly worse than the current behavior, where unknown fields are
+  harmlessly ignored.
+- **Temporal Worker Versioning handles the hard cases.** For a true breaking
+  change, deploy the new mixin on a new Build ID. Old running workflows continue
+  on old workers; new workflows start on new workers. This operates at the
+  infrastructure level — handling in-flight workflows, replay, and mixed-version
+  fleets — which message-level version fields cannot.
+- **`workflow.patched()` handles in-workflow transitions.** If a new mixin
+  version changes behavior (e.g., how it processes a signal), `patched()` gates
+  old vs. new logic within the same workflow code during the transition period.
+
+### Field defaults
+
+All fields follow rule 1:
+
+| Field | Default | Behavior when absent |
+|---|---|---|
+| `PublishInput.publisher_id` | `""` | Empty string skips dedup |
+| `PublishInput.sequence` | `0` | Zero skips dedup |
+| `_WireItem.offset` | `0` | Zero means "unknown" |
+| `PollResult.more_ready` | `False` | No truncation signaled |
+| `PubSubState.publisher_last_seen` | `{}` | No TTL pruning state |
+
+## Ecosystem analogs
+
+The closest analogs in established messaging systems, for orientation:
+
+- **Offset model** — NATS JetStream: one stream, multiple subjects, a
+  single monotonic sequence number. Subscribers filter by subject but
+  advance through the global sequence space. This is our model.
+- **Idempotent producer** — Kafka's producer ID + monotonic sequence
+  number, scoped to the broker. Our `publisher_id` + `sequence` at the
+  workflow does the same job, scoped to signal delivery into one workflow.
+- **Blocking pull** — Redis Streams `XREAD BLOCK`. Our `__pubsub_poll`
+  update with `wait_condition` is the Temporal-native equivalent.
+- **Durable-execution peer** — the Workflow SDK ([workflow-sdk.dev](https://workflow-sdk.dev))
+  has a first-class streaming model with indexed resumption and buffered
+  writes, but uses external storage (Redis/filesystem) as the broker
+  rather than the workflow itself.
+
+Full comparison tables (same/different with Kafka, NATS JetStream, Redis
+Streams, and Workflow SDK) live on the
+[Streaming API Design Considerations Notion page](https://www.notion.so/3478fc567738803d9c22eeb64a296e21).
+
+## Future Work
+
+### Shared workflow-side fan-out
+
+Each `__pubsub_poll` update today is serviced independently, and an item
+published to N interested subscribers crosses the wire N times (see
+[Design Decision 9](#9-subscription-is-poll-based-exposed-as-async-iterator)).
+For low fan-out (1–2 consumers) this is fine; for workloads with many
+concurrent subscribers on overlapping topics the duplication becomes the
+dominant cost.
+
+A shared fan-out would keep a registry of active polls inside the
+workflow, coalesce them by `(from_offset, topics)` key, and have one
+poll wake-up build a shared response that the handler returns to every
+matching caller. The tricky parts are: (a) offsets and topic filters
+usually differ per subscriber, limiting coalescing; (b) the registry is
+workflow state that must survive continue-as-new; (c) cancelled polls
+must be reaped cleanly so the registry doesn't leak across replays.
+Until a concrete workload shows the linear-in-subscribers cost matters,
+the simpler per-poll model is the right default — applications that need
+local fan-out can share one `subscribe()` iterator across N in-process
+consumers on the client side, where state is trivial.
+
+### Workflow-defined filters and transforms
+
+Today the only filter is "topic in topics". A richer model would let
+the workflow register named filters or transforms — e.g., `filter="high_priority"`
+or `transform="redact_pii"` — that run inside the poll handler before
+items are returned. This keeps computation close to the log, avoids
+shipping items the subscriber will discard, and lets workflows enforce
+access control per subscriber rather than delegating it to clients.
+
+Design questions left open: filter/transform registration API (at
+`PubSub` construction, or later?), whether transforms may change the
+item count (e.g., aggregation), how filter state interacts with
+continue-as-new, and how filter identity is named on the wire for
+cross-language clients.
+
+### Workflow-side subscription
+
+[Design Decision 10](#10-workflow-can-publish-but-should-not-subscribe)
+explains why workflow code shouldn't read the log directly today — the
+log contains data from non-deterministic signal inputs, and branching on
+it creates replay-sensitive code paths. There are workflow-side use
+cases (aggregator workflows, workflows that fan events out to child
+workflows, workflows that trigger activities based on stream content)
+where a proper subscription API would be useful.
+
+A safe workflow-side `subscribe()` would need to tag reads so they go
+through the same determinism machinery as other non-deterministic
+inputs — likely surfaced as an async iterator that yields at
+deterministic checkpoints. The simplest cut is probably a pull-based
+iterator over `self._log` slices that integrates with `wait_condition`
+for the "no data yet" case, mirroring the external poll API but
+bypassing the update RPC layer.
+
+## File Layout
+
+```
+temporalio/contrib/pubsub/
+├── __init__.py                  # Public API exports
+├── _broker.py                   # PubSub (workflow-side)
+├── _client.py                   # PubSubClient (external-side)
+├── _types.py                    # Shared data types
+├── README.md                    # Usage documentation
+└── DESIGN-v2.md                 # This document
+```
diff --git a/temporalio/contrib/pubsub/README.md b/temporalio/contrib/pubsub/README.md
new file mode 100644
index 000000000..bb547bea2
--- /dev/null
+++ b/temporalio/contrib/pubsub/README.md
@@ -0,0 +1,205 @@
+# Temporal Workflow Pub/Sub
+
+Workflows sometimes need to push incremental updates to external observers.
+Examples include providing customer updates during order processing, creating
+interactive experiences with AI agents, or reporting progress from a
+long-running data pipeline. Temporal's core primitives (workflows, signals, and
+updates) already provide the building blocks, but wiring up batching, offset
+tracking, topic filtering, and continue-as-new hand-off is non-trivial.
+
+This module packages that boilerplate into a reusable broker and client. The
+workflow acts as a message broker that maintains an append-only log.
+Applications can interact directly from the workflow, or from external clients
+such as activities, starters, and other workflows. Under the hood, publishing
+uses signals (fire-and-forget) while subscribing uses updates (long-poll). A
+configurable batching coalesces high-frequency events, improving efficiency.
+
+Payloads are Temporal `Payload`s carrying the encoding metadata needed for
+typed decode (`subscribe(result_type=T)`) and heterogeneous-topic dispatch
+(`Payload.metadata`). The codec chain (encryption, PII-redaction,
+compression) runs once on the signal/update envelope that carries each
+batch — **not** per item — so there is no double-encryption, and codec
+behavior is symmetric between workflow-side and client-side publishing.
+
+## Quick Start
+
+### Workflow side
+
+Construct a `PubSub` from your `@workflow.init`. The constructor
+dynamically registers the pub/sub signal, update, and query handlers on
+the current workflow, and raises `RuntimeError` if called twice. If you
+want the workflow to support continue-as-new, include a
+`PubSubState | None` field on the input and pass it through — it's
+`None` on fresh starts and carries state across CAN otherwise:
+
+```python
+from dataclasses import dataclass
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSub, PubSubState
+
+@dataclass
+class MyInput:
+    pubsub_state: PubSubState | None = None
+
+@workflow.defn
+class MyWorkflow:
+    @workflow.init
+    def __init__(self, input: MyInput) -> None:
+        self.pubsub = PubSub(prior_state=input.pubsub_state)
+
+    @workflow.run
+    async def run(self, input: MyInput) -> None:
+        self.pubsub.publish("status", StatusEvent(state="started"))
+        await do_work()
+        self.pubsub.publish("status", StatusEvent(state="done"))
+```
+
+Both workflow-side and client-side `publish()` use the sync payload
+converter for per-item `Payload` construction. The codec chain runs
+once at the envelope level (`__pubsub_publish` signal,
+`__pubsub_poll` update) — never per item — so encryption,
+PII-redaction, and compression are applied once each way.
+
+### Activity side (publishing)
+
+Use `PubSubClient.from_activity()` with the async context manager for
+batched publishing. The Temporal client and target workflow ID are taken
+from the activity context:
+
+```python
+from temporalio import activity
+from temporalio.contrib.pubsub import PubSubClient
+
+@activity.defn
+async def stream_events() -> None:
+    client = PubSubClient.from_activity(batch_interval=2.0)
+    async with client:
+        for chunk in generate_chunks():
+            client.publish("events", chunk)
+            activity.heartbeat()
+        # Buffer is flushed automatically on context manager exit
+```
+
+Use `force_flush=True` to trigger an immediate flush for latency-sensitive events:
+
+```python
+client.publish("events", data, force_flush=True)
+```
+
+### Subscribing
+
+Use `PubSubClient.create()` and the `subscribe()` async iterator:
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+
+client = PubSubClient.create(temporal_client, workflow_id)
+async for item in client.subscribe(["events"], result_type=MyEvent):
+    print(item.topic, item.data)
+    if is_done(item):
+        break
+```
+
+`item.data` is a `temporalio.api.common.v1.Payload` when no
+`result_type` is given; passing `result_type=T` decodes each item to
+`T` via the client's data converter (including the codec chain).
+
+## Topics
+
+Topics allow subscribers to receive a subset of the messages in the pub/sub system.
+Subscribers can request a list of specific topics, or provide an empty list to receive
+messages from all topics. Publishing to a topic implicitly creates it.
+
+## Continue-as-new
+
+Carry both your application state and pub/sub state across continue-as-new
+boundaries:
+
+```python
+from dataclasses import dataclass
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSub, PubSubState
+
+@dataclass
+class WorkflowInput:
+    # Your application state
+    items_processed: int = 0
+    # Pub/sub state
+    pubsub_state: PubSubState | None = None
+
+@workflow.defn
+class MyWorkflow:
+    @workflow.init
+    def __init__(self, input: WorkflowInput) -> None:
+        self.items_processed = input.items_processed
+        self.pubsub = PubSub(prior_state=input.pubsub_state)
+
+    @workflow.run
+    async def run(self, input: WorkflowInput) -> None:
+        # ... do work, updating self.items_processed ...
+
+        if workflow.info().is_continue_as_new_suggested():
+            self.pubsub.drain()
+            await workflow.wait_condition(workflow.all_handlers_finished)
+            workflow.continue_as_new(args=[WorkflowInput(
+                items_processed=self.items_processed,
+                pubsub_state=self.pubsub.get_state(),
+            )])
+```
+
+`pubsub.drain()` unblocks waiting subscribers and rejects new polls so
+`all_handlers_finished` can stabilize. Subscribers created via
+`PubSubClient.create()` or `PubSubClient.from_activity()` automatically
+follow continue-as-new chains.
+
+## API Reference
+
+### PubSub
+
+| Method | Description |
+|---|---|
+| `PubSub(prior_state=None)` | Constructor. Call once from `@workflow.init`; registers handlers on the current workflow. Raises `RuntimeError` if a `PubSub` is already registered. Pass `prior_state` if the input declares one (`None` on fresh starts). |
+| `publish(topic, value)` | Append to the log from workflow code. `value` is converted via the sync workflow payload converter (no codec). |
+| `get_state(*, publisher_ttl=900.0)` | Snapshot for continue-as-new. Drops publisher dedup entries older than `publisher_ttl` seconds. |
+| `drain()` | Unblock polls and reject new ones. |
+| `truncate(up_to_offset)` | Discard log entries below the given offset. Workflow-side only — no external API; wire up your own signal or update if external control is needed. |
+
+Handlers registered by the constructor:
+
+| Kind | Name | Description |
+|---|---|---|
+| Signal | `__pubsub_publish` | Receive external publications. |
+| Update | `__pubsub_poll` | Long-poll subscription. |
+| Query | `__pubsub_offset` | Current global offset. |
+
+### PubSubClient
+
+| Method | Description |
+|---|---|
+| `PubSubClient.create(client, workflow_id, *, batch_interval, max_batch_size, max_retry_duration)` | Factory with an explicit Temporal client and workflow id. Follows CAN. |
+| `PubSubClient.from_activity(*, batch_interval, max_batch_size, max_retry_duration)` | Factory that takes client and workflow id from the current activity context. Follows CAN. |
+| `PubSubClient(handle, *, batch_interval, max_batch_size, max_retry_duration)` | From handle (no CAN follow). |
+| `publish(topic, value, force_flush=False)` | Buffer a message. `value` may be any converter-compatible object or a pre-built `Payload`. Per-item conversion uses the sync payload converter; the codec chain runs once on the signal envelope. |
+| `subscribe(topics, from_offset, *, result_type=None, poll_cooldown=0.1)` | Async iterator. With `result_type=T`, `item.data` is decoded to `T`; otherwise it is a raw `Payload`. Follows CAN chains when created via `create` or `from_activity`. |
+| `get_offset()` | Query current global offset. |
+
+Use as `async with` for batched publishing with automatic flush.
+
+## Cross-Language Protocol
+
+Any Temporal client can interact with a pub/sub workflow using these
+fixed handler names:
+
+1. **Publish:** Signal `__pubsub_publish` with `PublishInput`
+2. **Subscribe:** Update `__pubsub_poll` with `PollInput` -> `PollResult`
+3. **Offset:** Query `__pubsub_offset` -> `int`
+
+The Python API exposes Temporal `Payload`s and decodes via the client's
+data converter. On the wire, each `PublishEntry.data` / `_WireItem.data`
+is a base64-encoded `Payload.SerializeToString()` so the transport
+remains JSON-serializable while preserving `Payload.metadata` (used by
+codecs and by the decode path). Cross-language clients can publish and
+subscribe by following the same base64-of-serialized-`Payload` shape.
+The signal/update envelopes (`PublishInput`, `PollResult`, `PubSubState`)
+require the default (JSON) data converter; custom converters on the
+envelope layer will break cross-language interop.
diff --git a/temporalio/contrib/pubsub/__init__.py b/temporalio/contrib/pubsub/__init__.py
new file mode 100644
index 000000000..7ce34e191
--- /dev/null
+++ b/temporalio/contrib/pubsub/__init__.py
@@ -0,0 +1,33 @@
+"""Pub/sub support for Temporal workflows.
+
+This module provides a reusable pub/sub pattern where a workflow acts as a
+message broker. External clients (activities, starters, other services) publish
+and subscribe through the workflow handle using Temporal primitives.
+
+Payloads are Temporal ``Payload`` values. Publishing values go through
+the client's data converter (including any configured codec chain);
+subscribers can yield raw ``Payload`` or request a concrete type via
+``subscribe(result_type=T)``.
+"""
+
+from temporalio.contrib.pubsub._broker import PubSub
+from temporalio.contrib.pubsub._client import PubSubClient
+from temporalio.contrib.pubsub._types import (
+    PollInput,
+    PollResult,
+    PublishEntry,
+    PublishInput,
+    PubSubItem,
+    PubSubState,
+)
+
+__all__ = [
+    "PollInput",
+    "PollResult",
+    "PubSub",
+    "PubSubClient",
+    "PubSubItem",
+    "PubSubState",
+    "PublishEntry",
+    "PublishInput",
+]
diff --git a/temporalio/contrib/pubsub/_broker.py b/temporalio/contrib/pubsub/_broker.py
new file mode 100644
index 000000000..112de3abd
--- /dev/null
+++ b/temporalio/contrib/pubsub/_broker.py
@@ -0,0 +1,311 @@
+"""Workflow-side pub/sub broker.
+
+Instantiate :class:`PubSub` once from your workflow's ``@workflow.init``
+method. The constructor registers the pub/sub signal, update, and query
+handlers on the current workflow via
+:func:`temporalio.workflow.set_signal_handler`,
+:func:`temporalio.workflow.set_update_handler`, and
+:func:`temporalio.workflow.set_query_handler`.
+
+For workflows that support continue-as-new, include a
+``PubSubState | None`` field on the workflow input and pass it as
+``prior_state`` — it is ``None`` on fresh starts and carries accumulated
+state on continue-as-new.
+
+Both workflow-side :meth:`PubSub.publish` and client-side
+:meth:`PubSubClient.publish` use the synchronous payload converter for
+per-item ``Payload`` construction. The codec chain (encryption,
+PII-redaction, compression) is **not** run per item on either side —
+it runs once at the envelope level when Temporal's SDK encodes the
+signal/update that carries the batch. Running it per item as well
+would double-encrypt, because every signal arg already goes through
+the client's ``DataConverter.encode`` at dispatch time.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+from temporalio import workflow
+from temporalio.api.common.v1 import Payload
+from temporalio.exceptions import ApplicationError
+
+from ._types import (
+    PollInput,
+    PollResult,
+    PublishInput,
+    PubSubItem,
+    PubSubState,
+    _decode_payload,
+    _encode_payload,
+    _WireItem,
+)
+
+_PUBLISH_SIGNAL = "__pubsub_publish"
+_POLL_UPDATE = "__pubsub_poll"
+_OFFSET_QUERY = "__pubsub_offset"
+
+_MAX_POLL_RESPONSE_BYTES = 1_000_000
+
+
+def _payload_wire_size(payload: Payload, topic: str) -> int:
+    """Approximate poll-response contribution of a single item.
+
+    Wire form is ``_WireItem(topic, base64(proto(Payload)), offset)``.
+    Base64 inflates by ~4/3; we use the exact serialized length as a
+    close-enough proxy.
+    """
+    return (payload.ByteSize() * 4 + 2) // 3 + len(topic)
+
+
+class PubSub:
+    """Workflow-side pub/sub broker.
+
+    Construct once from ``@workflow.init``; the constructor registers
+    the pub/sub signal, update, and query handlers on the current
+    workflow. Raises :class:`RuntimeError` if a ``PubSub`` has already
+    been registered on the workflow.
+
+    Registered handlers:
+
+    - ``__pubsub_publish`` signal — external publish with dedup
+    - ``__pubsub_poll`` update — long-poll subscription
+    - ``__pubsub_offset`` query — current log length
+    """
+
+    def __init__(self, prior_state: PubSubState | None = None) -> None:
+        """Initialize pub/sub state and register workflow handlers.
+
+        Must be called directly from the workflow's ``@workflow.init``
+        method. Calls made from ``@workflow.run``, helper methods, or
+        signal/update/query handlers raise :class:`RuntimeError`.
+
+        The check inspects the immediate caller's frame and requires the
+        function name to be ``__init__``. A history-length check (expect
+        length 3 on the first workflow task) is not used because
+        pre-start signals inflate the first-task history and cache
+        evictions legitimately re-run ``__init__`` from later tasks.
+
+        Args:
+            prior_state: State carried from a previous run via
+                :meth:`get_state` through continue-as-new, or ``None``
+                on first start.
+
+        Raises:
+            RuntimeError: If not called directly from a method named
+                ``__init__``, or if the pub/sub signal handler is
+                already registered on this workflow (i.e., ``PubSub``
+                was instantiated twice).
+
+        Note:
+            When carrying state across continue-as-new, type the
+            carrying field as ``PubSubState | None`` — not ``Any``. The
+            default data converter deserializes ``Any`` fields as plain
+            dicts, which silently strips the ``PubSubState`` type and
+            breaks the new run.
+        """
+        caller = sys._getframe(1)
+        caller_name = caller.f_code.co_name
+        if caller_name != "__init__":
+            raise RuntimeError(
+                "PubSub must be constructed directly from the workflow's "
+                f"@workflow.init method, not from {caller_name!r}."
+            )
+        if workflow.get_signal_handler(_PUBLISH_SIGNAL) is not None:
+            raise RuntimeError(
+                "PubSub is already registered on this workflow. "
+                "Construct PubSub(...) at most once from @workflow.init."
+            )
+
+        if prior_state is not None:
+            self._log: list[PubSubItem] = [
+                PubSubItem(topic=item.topic, data=_decode_payload(item.data))
+                for item in prior_state.log
+            ]
+            self._base_offset: int = prior_state.base_offset
+            self._publisher_sequences: dict[str, int] = dict(
+                prior_state.publisher_sequences
+            )
+            self._publisher_last_seen: dict[str, float] = dict(
+                prior_state.publisher_last_seen
+            )
+        else:
+            self._log = []
+            self._base_offset = 0
+            self._publisher_sequences = {}
+            self._publisher_last_seen = {}
+        self._draining: bool = False
+
+        workflow.set_signal_handler(_PUBLISH_SIGNAL, self._on_publish)
+        workflow.set_update_handler(
+            _POLL_UPDATE, self._on_poll, validator=self._validate_poll
+        )
+        workflow.set_query_handler(_OFFSET_QUERY, self._on_offset)
+
+    def publish(self, topic: str, value: Any) -> None:
+        """Publish an item from within workflow code.
+
+        ``value`` may be any Python value the workflow's payload
+        converter can handle, or a pre-built
+        :class:`temporalio.api.common.v1.Payload` for zero-copy.
+
+        The codec chain is not applied here (it runs on the
+        ``__pubsub_poll`` update envelope that later delivers the
+        item to a subscriber).
+        """
+        if isinstance(value, Payload):
+            payload = value
+        else:
+            payload = workflow.payload_converter().to_payloads([value])[0]
+        self._log.append(PubSubItem(topic=topic, data=payload))
+
+    def get_state(self, *, publisher_ttl: float = 900.0) -> PubSubState:
+        """Return a serializable snapshot of pub/sub state for continue-as-new.
+
+        Prunes publisher dedup entries older than ``publisher_ttl``
+        seconds. The TTL must exceed the ``max_retry_duration`` of any
+        client that may still be retrying a failed flush.
+
+        Args:
+            publisher_ttl: Seconds after which a publisher's dedup
+                entry is pruned. Default 900 (15 minutes).
+        """
+        now = workflow.time()
+
+        active_sequences: dict[str, int] = {}
+        active_last_seen: dict[str, float] = {}
+        for pid, seq in self._publisher_sequences.items():
+            ts = self._publisher_last_seen.get(pid, 0.0)
+            if now - ts < publisher_ttl:
+                active_sequences[pid] = seq
+                active_last_seen[pid] = ts
+
+        return PubSubState(
+            log=[
+                _WireItem(topic=item.topic, data=_encode_payload(item.data))
+                for item in self._log
+            ],
+            base_offset=self._base_offset,
+            publisher_sequences=active_sequences,
+            publisher_last_seen=active_last_seen,
+        )
+
+    def drain(self) -> None:
+        """Unblock all waiting poll handlers and reject new polls.
+
+        Call this before
+        ``await workflow.wait_condition(workflow.all_handlers_finished)``
+        and ``workflow.continue_as_new()``.
+        """
+        self._draining = True
+
+    def truncate(self, up_to_offset: int) -> None:
+        """Discard log entries before ``up_to_offset``.
+
+        After truncation, polls requesting an offset before the new
+        base will receive a ValueError. All global offsets remain
+        monotonic.
+
+        Args:
+            up_to_offset: The global offset to truncate up to
+                (exclusive). Entries at offsets
+                ``[base_offset, up_to_offset)`` are discarded.
+        """
+        log_index = up_to_offset - self._base_offset
+        if log_index <= 0:
+            return
+        if log_index > len(self._log):
+            raise ValueError(
+                f"Cannot truncate to offset {up_to_offset}: "
+                f"only {self._base_offset + len(self._log)} "
+                f"items exist"
+            )
+        self._log = self._log[log_index:]
+        self._base_offset = up_to_offset
+
+    def _on_publish(self, payload: PublishInput) -> None:
+        """Receive publications from external clients (activities, starters).
+
+        Deduplicates using (publisher_id, sequence). If publisher_id is
+        set and the sequence is <= the last seen sequence for that
+        publisher, the entire batch is dropped as a duplicate. Batches
+        are atomic: the dedup decision applies to the whole batch, not
+        individual items.
+        """
+        if payload.publisher_id:
+            last_seq = self._publisher_sequences.get(payload.publisher_id, 0)
+            if payload.sequence <= last_seq:
+                return
+            self._publisher_sequences[payload.publisher_id] = payload.sequence
+            self._publisher_last_seen[payload.publisher_id] = workflow.time()
+        for entry in payload.items:
+            self._log.append(
+                PubSubItem(topic=entry.topic, data=_decode_payload(entry.data))
+            )
+
+    async def _on_poll(self, payload: PollInput) -> PollResult:
+        """Long-poll: block until new items available or draining, then return."""
+        log_offset = payload.from_offset - self._base_offset
+        if log_offset < 0:
+            if payload.from_offset == 0:
+                # "From the beginning" — start at whatever is available.
+                log_offset = 0
+            else:
+                # Subscriber had a specific position that's been
+                # truncated. ApplicationError fails this update (client
+                # gets the error) without crashing the workflow task —
+                # avoids a poison pill during replay.
+                raise ApplicationError(
+                    f"Requested offset {payload.from_offset} has been truncated. "
+                    f"Current base offset is {self._base_offset}.",
+                    type="TruncatedOffset",
+                    non_retryable=True,
+                )
+        await workflow.wait_condition(
+            lambda: len(self._log) > log_offset or self._draining,
+        )
+        all_new = self._log[log_offset:]
+        if payload.topics:
+            topic_set = set(payload.topics)
+            candidates = [
+                (self._base_offset + log_offset + i, item)
+                for i, item in enumerate(all_new)
+                if item.topic in topic_set
+            ]
+        else:
+            candidates = [
+                (self._base_offset + log_offset + i, item)
+                for i, item in enumerate(all_new)
+            ]
+        # Cap response size to ~1MB wire bytes.
+        wire_items: list[_WireItem] = []
+        size = 0
+        more_ready = False
+        next_offset = self._base_offset + len(self._log)
+        for off, item in candidates:
+            item_size = _payload_wire_size(item.data, item.topic)
+            if size + item_size > _MAX_POLL_RESPONSE_BYTES and wire_items:
+                # Resume from this item on the next poll.
+                next_offset = off
+                more_ready = True
+                break
+            size += item_size
+            wire_items.append(
+                _WireItem(topic=item.topic, data=_encode_payload(item.data), offset=off)
+            )
+        return PollResult(
+            items=wire_items,
+            next_offset=next_offset,
+            more_ready=more_ready,
+        )
+
+    def _validate_poll(self, payload: PollInput) -> None:  # noqa: ARG002
+        """Reject new polls when draining for continue-as-new."""
+        del payload
+        if self._draining:
+            raise RuntimeError("Workflow is draining for continue-as-new")
+
+    def _on_offset(self) -> int:
+        """Return the current global offset (base_offset + log length)."""
+        return self._base_offset + len(self._log)
diff --git a/temporalio/contrib/pubsub/_client.py b/temporalio/contrib/pubsub/_client.py
new file mode 100644
index 000000000..ba3eda52a
--- /dev/null
+++ b/temporalio/contrib/pubsub/_client.py
@@ -0,0 +1,425 @@
+"""External-side pub/sub client.
+
+Used by activities, starters, and any code with a workflow handle to
+publish messages and subscribe to topics on a pub/sub workflow.
+
+Each published value is turned into a :class:`Payload` via the client's
+sync payload converter. The **codec chain** (encryption, PII-redaction,
+compression) is **not** run per item — it runs once at the envelope
+level when Temporal's SDK encodes the ``__pubsub_publish`` signal args
+and the ``__pubsub_poll`` update result. Running the codec per item as
+well would double-encrypt / double-compress, because the envelope path
+covers the items again. The per-item ``Payload`` still carries the
+encoding metadata (``encoding: json/plain``, ``messageType``, etc.)
+required by ``subscribe(result_type=T)`` on the consumer side.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+import uuid
+from collections.abc import AsyncIterator
+from typing import Any
+
+from typing_extensions import Self
+
+from temporalio import activity
+from temporalio.api.common.v1 import Payload
+from temporalio.client import (
+    Client,
+    WorkflowExecutionStatus,
+    WorkflowHandle,
+    WorkflowUpdateFailedError,
+    WorkflowUpdateRPCTimeoutOrCancelledError,
+)
+from temporalio.converter import DataConverter, PayloadConverter
+
+from ._types import (
+    PollInput,
+    PollResult,
+    PublishEntry,
+    PublishInput,
+    PubSubItem,
+    _decode_payload,
+    _encode_payload,
+)
+
+
+class PubSubClient:
+    """Client for publishing to and subscribing from a pub/sub workflow.
+
+    Create via :py:meth:`create` (explicit client + workflow id),
+    :py:meth:`from_activity` (infer both from the current activity
+    context), or by passing a handle directly to the constructor.
+
+    For publishing, use as an async context manager to get automatic
+    batching::
+
+        client = PubSubClient.create(temporal_client, workflow_id)
+        async with client:
+            client.publish("events", my_event)
+            client.publish("events", another_event, force_flush=True)
+
+    For subscribing::
+
+        client = PubSubClient.create(temporal_client, workflow_id)
+        async for item in client.subscribe(["events"], result_type=MyEvent):
+            process(item.data)
+    """
+
+    def __init__(
+        self,
+        handle: WorkflowHandle[Any, Any],
+        *,
+        client: Client | None = None,
+        batch_interval: float = 2.0,
+        max_batch_size: int | None = None,
+        max_retry_duration: float = 600.0,
+    ) -> None:
+        """Create a pub/sub client from a workflow handle.
+
+        Prefer :py:meth:`create` — it enables continue-as-new following
+        in ``subscribe()`` and supplies the :class:`Client` needed to
+        reach the data converter chain.
+
+        Args:
+            handle: Workflow handle to the pub/sub workflow.
+            client: Temporal client whose payload converter will be used
+                to turn published values into ``Payload`` objects and to
+                decode subscriptions when ``result_type`` is set. The
+                codec chain is **not** applied per item (doing so would
+                double-encrypt — see module docstring). If ``None``, the
+                default payload converter is used.
+            batch_interval: Seconds between automatic flushes.
+            max_batch_size: Auto-flush when buffer reaches this size.
+            max_retry_duration: Maximum seconds to retry a failed flush
+                before raising TimeoutError. Must be less than the
+                workflow's ``publisher_ttl`` (default 900s) to preserve
+                exactly-once delivery. Default: 600s.
+        """
+        self._handle: WorkflowHandle[Any, Any] = handle
+        self._client: Client | None = client
+        self._workflow_id = handle.id
+        self._batch_interval = batch_interval
+        self._max_batch_size = max_batch_size
+        self._max_retry_duration = max_retry_duration
+        self._buffer: list[tuple[str, Any]] = []
+        self._flush_event = asyncio.Event()
+        self._flush_task: asyncio.Task[None] | None = None
+        self._flush_lock = asyncio.Lock()
+        self._publisher_id: str = uuid.uuid4().hex[:16]
+        self._sequence: int = 0
+        self._pending: list[PublishEntry] | None = None
+        self._pending_seq: int = 0
+        self._pending_since: float | None = None
+
+    @classmethod
+    def create(
+        cls,
+        client: Client,
+        workflow_id: str,
+        *,
+        batch_interval: float = 2.0,
+        max_batch_size: int | None = None,
+        max_retry_duration: float = 600.0,
+    ) -> PubSubClient:
+        """Create a pub/sub client from a Temporal client and workflow ID.
+
+        Use this when the caller has an explicit ``Client`` and
+        ``workflow_id`` in hand (starters, BFFs, other workflows'
+        activities). For code running inside an activity that targets
+        its own parent workflow, see :py:meth:`from_activity`.
+
+        A client created through this method follows continue-as-new
+        chains in ``subscribe()`` and uses the client's payload
+        converter for per-item ``Payload`` construction.
+
+        Args:
+            client: Temporal client.
+            workflow_id: ID of the pub/sub workflow.
+            batch_interval: Seconds between automatic flushes.
+            max_batch_size: Auto-flush when buffer reaches this size.
+            max_retry_duration: Maximum seconds to retry a failed flush
+                before raising TimeoutError. Default: 600s.
+        """
+        handle = client.get_workflow_handle(workflow_id)
+        return cls(
+            handle,
+            client=client,
+            batch_interval=batch_interval,
+            max_batch_size=max_batch_size,
+            max_retry_duration=max_retry_duration,
+        )
+
+    @classmethod
+    def from_activity(
+        cls,
+        *,
+        batch_interval: float = 2.0,
+        max_batch_size: int | None = None,
+        max_retry_duration: float = 600.0,
+    ) -> PubSubClient:
+        """Create a pub/sub client targeting the current activity's parent workflow.
+
+        Must be called from within an activity. The Temporal client and
+        parent workflow id are taken from the activity context.
+
+        Args:
+            batch_interval: Seconds between automatic flushes.
+            max_batch_size: Auto-flush when buffer reaches this size.
+            max_retry_duration: Maximum seconds to retry a failed flush
+                before raising TimeoutError. Default: 600s.
+        """
+        info = activity.info()
+        workflow_id = info.workflow_id
+        assert (
+            workflow_id is not None
+        ), "from_activity requires an activity with a parent workflow"
+        return cls.create(
+            activity.client(),
+            workflow_id,
+            batch_interval=batch_interval,
+            max_batch_size=max_batch_size,
+            max_retry_duration=max_retry_duration,
+        )
+
+    async def __aenter__(self) -> Self:
+        """Start the background flusher task."""
+        self._flush_task = asyncio.create_task(self._run_flusher())
+        return self
+
+    async def __aexit__(self, *_exc: object) -> None:
+        """Stop the flusher and flush any remaining buffered entries."""
+        if self._flush_task:
+            self._flush_task.cancel()
+            try:
+                await self._flush_task
+            except asyncio.CancelledError:
+                pass
+            self._flush_task = None
+        # Drain both pending and buffer. A single _flush() processes
+        # either pending OR buffer, not both — so if the flusher was
+        # cancelled mid-signal (pending set) while the producer added
+        # more items (buffer non-empty), a single final flush would
+        # orphan the buffer.
+        while self._pending is not None or self._buffer:
+            await self._flush()
+
+    def publish(self, topic: str, value: Any, force_flush: bool = False) -> None:
+        """Buffer a message for publishing.
+
+        ``value`` may be any Python value the client's payload
+        converter can handle, or a pre-built
+        :class:`temporalio.api.common.v1.Payload` for zero-copy. The
+        codec chain is not applied per item — it runs once on the
+        signal envelope that delivers the batch.
+
+        Args:
+            topic: Topic string.
+            value: Value to publish. Converted to a ``Payload`` via
+                the client's sync payload converter at flush time.
+                Pre-built ``Payload`` instances bypass conversion.
+            force_flush: If True, wake the flusher to send immediately
+                (fire-and-forget — does not block the caller).
+        """
+        self._buffer.append((topic, value))
+        if force_flush or (
+            self._max_batch_size is not None
+            and len(self._buffer) >= self._max_batch_size
+        ):
+            self._flush_event.set()
+
+    def _payload_converter(self) -> PayloadConverter:
+        """Return the sync payload converter for per-item encode/decode.
+
+        Uses the configured client's payload converter when available;
+        otherwise falls back to the default. The codec chain
+        (encryption, compression, PII-redaction) is intentionally not
+        invoked here — it runs once at the envelope level when the
+        signal/update goes over the wire. See module docstring.
+        """
+        if self._client is not None:
+            return self._client.data_converter.payload_converter
+        return DataConverter.default.payload_converter
+
+    def _encode_buffer(self, entries: list[tuple[str, Any]]) -> list[PublishEntry]:
+        """Convert buffered (topic, value) pairs to wire entries.
+
+        Non-Payload values go through the sync payload converter so the
+        resulting ``Payload`` carries encoding metadata for
+        ``result_type=`` decode on the consumer side. Pre-built
+        Payloads bypass conversion.
+        """
+        converter = self._payload_converter()
+        out: list[PublishEntry] = []
+        for topic, value in entries:
+            if isinstance(value, Payload):
+                payload = value
+            else:
+                payload = converter.to_payloads([value])[0]
+            out.append(PublishEntry(topic=topic, data=_encode_payload(payload)))
+        return out
+
+    async def _flush(self) -> None:
+        """Send buffered or pending messages to the workflow via signal.
+
+        On failure, the pending batch and sequence are kept for retry.
+        Only advances the confirmed sequence on success.
+        """
+        async with self._flush_lock:
+            if self._pending is not None:
+                # Retry path: check max_retry_duration
+                if (
+                    self._pending_since is not None
+                    and time.monotonic() - self._pending_since
+                    > self._max_retry_duration
+                ):
+                    # Advance confirmed sequence so the next batch gets
+                    # a fresh sequence number. Without this, the next
+                    # batch reuses pending_seq, which the workflow may
+                    # have already accepted — causing silent dedup
+                    # (data loss). See DropPendingFixed /
+                    # SequenceFreshness in the design doc.
+                    self._sequence = self._pending_seq
+                    self._pending = None
+                    self._pending_seq = 0
+                    self._pending_since = None
+                    raise TimeoutError(
+                        f"Flush retry exceeded max_retry_duration "
+                        f"({self._max_retry_duration}s). Pending batch dropped. "
+                        f"If the signal was delivered, items are in the log. "
+                        f"If not, they are lost."
+                    )
+                batch = self._pending
+                seq = self._pending_seq
+            elif self._buffer:
+                # New batch path
+                raw = self._buffer
+                self._buffer = []
+                batch = self._encode_buffer(raw)
+                seq = self._sequence + 1
+                self._pending = batch
+                self._pending_seq = seq
+                self._pending_since = time.monotonic()
+            else:
+                return
+
+            try:
+                await self._handle.signal(
+                    "__pubsub_publish",
+                    PublishInput(
+                        items=batch,
+                        publisher_id=self._publisher_id,
+                        sequence=seq,
+                    ),
+                )
+                # Success: advance confirmed sequence, clear pending
+                self._sequence = seq
+                self._pending = None
+                self._pending_seq = 0
+                self._pending_since = None
+            except Exception:
+                # Pending stays set for retry on the next _flush() call
+                raise
+
+    async def _run_flusher(self) -> None:
+        """Background task: wait for timer OR force_flush wakeup, then flush."""
+        while True:
+            try:
+                await asyncio.wait_for(
+                    self._flush_event.wait(), timeout=self._batch_interval
+                )
+            except asyncio.TimeoutError:
+                pass
+            self._flush_event.clear()
+            await self._flush()
+
+    async def subscribe(
+        self,
+        topics: list[str] | None = None,
+        from_offset: int = 0,
+        *,
+        result_type: type | None = None,
+        poll_cooldown: float = 0.1,
+    ) -> AsyncIterator[PubSubItem]:
+        """Async iterator that polls for new items.
+
+        Automatically follows continue-as-new chains when the client
+        was created via :py:meth:`create`.
+
+        Args:
+            topics: Topic filter. None or empty list means all topics.
+            from_offset: Global offset to start reading from.
+            result_type: Optional target type. When provided, each
+                yielded :class:`PubSubItem` has its ``data`` decoded
+                via the client's sync payload converter to the
+                specified type. When omitted, ``data`` is the raw
+                :class:`~temporalio.api.common.v1.Payload` — useful
+                for heterogeneous topics where the caller dispatches
+                on ``Payload.metadata``.
+            poll_cooldown: Minimum seconds between polls to avoid
+                overwhelming the workflow when items arrive faster
+                than the poll round-trip. Defaults to 0.1.
+
+        Yields:
+            :class:`PubSubItem` for each matching item.
+        """
+        offset = from_offset
+        while True:
+            try:
+                result: PollResult = await self._handle.execute_update(
+                    "__pubsub_poll",
+                    PollInput(topics=topics or [], from_offset=offset),
+                    result_type=PollResult,
+                )
+            except asyncio.CancelledError:
+                return
+            except WorkflowUpdateFailedError as e:
+                if e.cause and getattr(e.cause, "type", None) == "TruncatedOffset":
+                    # Subscriber fell behind truncation. Retry from
+                    # offset 0 which the mixin treats as "from the
+                    # beginning of whatever exists" (i.e., from
+                    # base_offset).
+                    offset = 0
+                    continue
+                raise
+            except WorkflowUpdateRPCTimeoutOrCancelledError:
+                if await self._follow_continue_as_new():
+                    continue
+                return
+            converter = self._payload_converter()
+            for wire_item in result.items:
+                payload = _decode_payload(wire_item.data)
+                if result_type is not None:
+                    data: Any = converter.from_payload(payload, result_type)
+                else:
+                    data = payload
+                yield PubSubItem(
+                    topic=wire_item.topic,
+                    data=data,
+                    offset=wire_item.offset,
+                )
+            offset = result.next_offset
+            if not result.more_ready and poll_cooldown > 0:
+                await asyncio.sleep(poll_cooldown)
+
+    async def _follow_continue_as_new(self) -> bool:
+        """Check if the workflow continued-as-new and re-target the handle.
+
+        Returns True if the handle was updated (caller should retry).
+        """
+        if self._client is None:
+            return False
+        try:
+            desc = await self._handle.describe()
+        except Exception:
+            return False
+        if desc.status == WorkflowExecutionStatus.CONTINUED_AS_NEW:
+            self._handle = self._client.get_workflow_handle(self._workflow_id)
+            return True
+        return False
+
+    async def get_offset(self) -> int:
+        """Query the current global offset (base_offset + log length)."""
+        return await self._handle.query("__pubsub_offset", result_type=int)
diff --git a/temporalio/contrib/pubsub/_types.py b/temporalio/contrib/pubsub/_types.py
new file mode 100644
index 000000000..e3fcb2acf
--- /dev/null
+++ b/temporalio/contrib/pubsub/_types.py
@@ -0,0 +1,128 @@
+"""Shared data types for the pub/sub contrib module.
+
+The user-facing ``data`` fields on :class:`PubSubItem` are
+:class:`temporalio.api.common.v1.Payload` so that user codec chains
+(encryption, PII-redaction, compression) apply per item. See
+``DESIGN-v2.md`` §5 and ``docs/pubsub-payload-migration.md``.
+
+The wire representation (``PublishEntry``, ``_WireItem``) uses
+base64-encoded ``Payload.SerializeToString()`` bytes because the default
+JSON payload converter cannot serialize a ``Payload`` embedded inside a
+dataclass (it only special-cases top-level Payloads on signal/update
+args). Round-trip validated in
+``tests/contrib/pubsub/test_payload_roundtrip_prototype.py``.
+"""
+
+from __future__ import annotations
+
+import base64
+from dataclasses import dataclass, field
+from typing import Any
+
+from temporalio.api.common.v1 import Payload
+
+
+def _encode_payload(payload: Payload) -> str:  # pyright: ignore[reportUnusedFunction]
+    """Wire format: base64(Payload.SerializeToString())."""
+    return base64.b64encode(payload.SerializeToString()).decode("ascii")
+
+
+def _decode_payload(wire: str) -> Payload:  # pyright: ignore[reportUnusedFunction]
+    """Inverse of :func:`_encode_payload`."""
+    payload = Payload()
+    payload.ParseFromString(base64.b64decode(wire))
+    return payload
+
+
+@dataclass
+class PubSubItem:
+    """A single item in the pub/sub log.
+
+    The ``data`` field is a :class:`temporalio.api.common.v1.Payload`
+    as stored by the mixin and yielded by
+    :meth:`PubSubClient.subscribe` when no ``result_type`` is given.
+    When ``result_type`` is passed to ``subscribe``, ``data`` holds the
+    decoded value of that type instead — the dataclass is typed as
+    ``Any`` to accommodate both.
+
+    The ``offset`` field is populated at poll time from the item's
+    position in the global log.
+    """
+
+    topic: str
+    data: Any
+    offset: int = 0
+
+
+@dataclass
+class PublishEntry:
+    """A single entry to publish via signal (wire type).
+
+    ``data`` is base64-encoded ``Payload.SerializeToString()`` output —
+    see module docstring for why a nested ``Payload`` cannot be used
+    directly.
+    """
+
+    topic: str
+    data: str
+
+
+@dataclass
+class PublishInput:
+    """Signal payload: batch of entries to publish.
+
+    Includes publisher_id and sequence to ensure exactly-once delivery.
+    """
+
+    items: list[PublishEntry] = field(default_factory=list)
+    publisher_id: str = ""
+    sequence: int = 0
+
+
+@dataclass
+class PollInput:
+    """Update payload: request to poll for new items."""
+
+    topics: list[str] = field(default_factory=list)
+    from_offset: int = 0
+
+
+@dataclass
+class _WireItem:
+    """Wire representation of a PubSubItem (base64 of serialized Payload)."""
+
+    topic: str
+    data: str
+    offset: int = 0
+
+
+@dataclass
+class PollResult:
+    """Update response: items matching the poll request.
+
+    ``items`` use the wire representation. When ``more_ready`` is True,
+    the response was truncated to stay within size limits and the
+    subscriber should poll again immediately rather than applying a
+    cooldown delay.
+    """
+
+    items: list[_WireItem] = field(default_factory=list)
+    next_offset: int = 0
+    more_ready: bool = False
+
+
+@dataclass
+class PubSubState:
+    """Serializable snapshot of pub/sub state for continue-as-new.
+
+    The containing workflow input must type the field as
+    ``PubSubState | None``, not ``Any``, so the default data converter
+    can reconstruct the dataclass from JSON.
+
+    Log items use the wire representation for serialization stability.
+    """
+
+    log: list[_WireItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)
diff --git a/tests/contrib/google_adk_agents/test_adk_streaming.py b/tests/contrib/google_adk_agents/test_adk_streaming.py
new file mode 100644
index 000000000..27c87b453
--- /dev/null
+++ b/tests/contrib/google_adk_agents/test_adk_streaming.py
@@ -0,0 +1,196 @@
+"""Integration tests for ADK streaming support.
+
+Verifies that the streaming model activity publishes TEXT_DELTA events via
+the PubSub broker and that non-streaming mode remains backward-compatible.
+"""
+
+import asyncio
+import json
+import logging
+import uuid
+from collections.abc import AsyncGenerator
+from datetime import timedelta
+
+import pytest
+from google.adk import Agent
+from google.adk.models import BaseLlm, LLMRegistry
+from google.adk.models.llm_request import LlmRequest
+from google.adk.models.llm_response import LlmResponse
+from google.adk.runners import InMemoryRunner
+from google.genai.types import Content, Part
+
+from temporalio import workflow
+from temporalio.client import Client
+from temporalio.contrib.google_adk_agents import GoogleAdkPlugin, TemporalModel
+from temporalio.contrib.pubsub import PubSub, PubSubClient
+from temporalio.worker import Worker
+
+logger = logging.getLogger(__name__)
+
+
+class StreamingTestModel(BaseLlm):
+    """Test model that yields multiple partial responses to simulate streaming."""
+
+    @classmethod
+    def supported_models(cls) -> list[str]:
+        return ["streaming_test_model"]
+
+    async def generate_content_async(
+        self, llm_request: LlmRequest, stream: bool = False
+    ) -> AsyncGenerator[LlmResponse, None]:
+        yield LlmResponse(content=Content(role="model", parts=[Part(text="Hello ")]))
+        yield LlmResponse(content=Content(role="model", parts=[Part(text="world!")]))
+
+
+@workflow.defn
+class StreamingAdkWorkflow:
+    """Test workflow that uses streaming TemporalModel with PubSub."""
+
+    @workflow.init
+    def __init__(self, prompt: str) -> None:
+        self.pubsub = PubSub()
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        model = TemporalModel("streaming_test_model", streaming=True)
+        agent = Agent(
+            name="test_agent",
+            model=model,
+            instruction="You are a test agent.",
+        )
+
+        runner = InMemoryRunner(agent=agent, app_name="test-app")
+        session = await runner.session_service.create_session(
+            app_name="test-app", user_id="test"
+        )
+
+        final_text = ""
+        async for event in runner.run_async(
+            user_id="test",
+            session_id=session.id,
+            new_message=Content(role="user", parts=[Part(text=prompt)]),
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    if part.text:
+                        final_text = part.text
+
+        return final_text
+
+
+@workflow.defn
+class NonStreamingAdkWorkflow:
+    """Test workflow without streaming -- verifies backward compatibility."""
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        model = TemporalModel("streaming_test_model", streaming=False)
+        agent = Agent(
+            name="test_agent",
+            model=model,
+            instruction="You are a test agent.",
+        )
+
+        runner = InMemoryRunner(agent=agent, app_name="test-app")
+        session = await runner.session_service.create_session(
+            app_name="test-app", user_id="test"
+        )
+
+        final_text = ""
+        async for event in runner.run_async(
+            user_id="test",
+            session_id=session.id,
+            new_message=Content(role="user", parts=[Part(text=prompt)]),
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    if part.text:
+                        final_text = part.text
+
+        return final_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_publishes_events(client: Client):
+    """Verify that streaming activity publishes TEXT_DELTA events via pubsub."""
+    LLMRegistry.register(StreamingTestModel)
+
+    new_config = client.config()
+    new_config["plugins"] = [GoogleAdkPlugin()]
+    client = Client(**new_config)
+
+    workflow_id = f"adk-streaming-test-{uuid.uuid4()}"
+
+    async with Worker(
+        client,
+        task_queue="adk-streaming-test",
+        workflows=[StreamingAdkWorkflow],
+        max_cached_workflows=0,
+    ):
+        handle = await client.start_workflow(
+            StreamingAdkWorkflow.run,
+            "Hello",
+            id=workflow_id,
+            task_queue="adk-streaming-test",
+            execution_timeout=timedelta(seconds=30),
+        )
+
+        # Subscribe concurrently while the workflow is running
+        pubsub = PubSubClient.create(client, workflow_id)
+        events: list[dict] = []
+
+        async def collect_events() -> None:
+            async for item in pubsub.subscribe(
+                ["events"], from_offset=0, result_type=bytes, poll_cooldown=0.05
+            ):
+                event = json.loads(item.data)
+                events.append(event)
+                if event["type"] == "LLM_CALL_COMPLETE":
+                    break
+
+        collect_task = asyncio.create_task(collect_events())
+        result = await handle.result()
+
+        # Wait for event collection with a timeout
+        await asyncio.wait_for(collect_task, timeout=10.0)
+
+    assert result is not None
+
+    event_types = [e["type"] for e in events]
+    assert (
+        "LLM_CALL_START" in event_types
+    ), f"Expected LLM_CALL_START, got: {event_types}"
+    assert "TEXT_DELTA" in event_types, f"Expected TEXT_DELTA, got: {event_types}"
+    assert (
+        "LLM_CALL_COMPLETE" in event_types
+    ), f"Expected LLM_CALL_COMPLETE, got: {event_types}"
+
+    text_deltas = [e["data"]["delta"] for e in events if e["type"] == "TEXT_DELTA"]
+    assert len(text_deltas) >= 1, f"Expected at least 1 TEXT_DELTA, got: {text_deltas}"
+
+
+@pytest.mark.asyncio
+async def test_non_streaming_backward_compatible(client: Client):
+    """Verify non-streaming mode still works (backward compatibility)."""
+    LLMRegistry.register(StreamingTestModel)
+
+    new_config = client.config()
+    new_config["plugins"] = [GoogleAdkPlugin()]
+    client = Client(**new_config)
+
+    async with Worker(
+        client,
+        task_queue="adk-non-streaming-test",
+        workflows=[NonStreamingAdkWorkflow],
+        max_cached_workflows=0,
+    ):
+        handle = await client.start_workflow(
+            NonStreamingAdkWorkflow.run,
+            "Hello",
+            id=f"adk-non-streaming-test-{uuid.uuid4()}",
+            task_queue="adk-non-streaming-test",
+            execution_timeout=timedelta(seconds=30),
+        )
+        result = await handle.result()
+
+    assert result is not None
diff --git a/tests/contrib/openai_agents/test_openai_streaming.py b/tests/contrib/openai_agents/test_openai_streaming.py
new file mode 100644
index 000000000..f4598506a
--- /dev/null
+++ b/tests/contrib/openai_agents/test_openai_streaming.py
@@ -0,0 +1,345 @@
+"""Integration tests for OpenAI Agents streaming support.
+
+The streaming activity publishes raw OpenAI stream events to the pubsub
+side channel; consumers parse them directly. These tests verify that the
+events arrive intact and that the workflow still returns the right final
+result from the ResponseCompletedEvent.
+"""
+
+import asyncio
+import json
+import logging
+import uuid
+from collections.abc import AsyncIterator
+from datetime import timedelta
+from typing import Any
+
+import pytest
+from agents import (
+    Agent,
+    AgentOutputSchemaBase,
+    Handoff,
+    Model,
+    ModelResponse,
+    ModelSettings,
+    ModelTracing,
+    Runner,
+    Tool,
+    TResponseInputItem,
+    Usage,
+)
+from agents.items import TResponseStreamEvent
+from openai.types.responses import (
+    Response,
+    ResponseCompletedEvent,
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseTextConfig,
+    ResponseTextDeltaEvent,
+    ResponseUsage,
+)
+from openai.types.responses.response_usage import (
+    InputTokensDetails,
+    OutputTokensDetails,
+)
+from openai.types.shared.response_format_text import ResponseFormatText
+
+from temporalio import workflow
+from temporalio.client import Client, WorkflowFailureError
+from temporalio.contrib.openai_agents import ModelActivityParameters
+from temporalio.contrib.openai_agents.testing import AgentEnvironment
+from temporalio.contrib.pubsub import PubSub, PubSubClient
+from temporalio.exceptions import ApplicationError
+from tests.helpers import new_worker
+
+logger = logging.getLogger(__name__)
+
+
+class StreamingTestModel(Model):
+    """Test model that yields text deltas followed by a ResponseCompletedEvent."""
+
+    __test__ = False
+
+    async def get_response(
+        self,
+        system_instructions: str | None,
+        input: str | list[TResponseInputItem],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: AgentOutputSchemaBase | None,
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        **kwargs: Any,
+    ) -> ModelResponse:
+        return ModelResponse(
+            output=[
+                ResponseOutputMessage(
+                    id="msg_test",
+                    content=[
+                        ResponseOutputText(
+                            text="Hello world!",
+                            annotations=[],
+                            type="output_text",
+                            logprobs=[],
+                        )
+                    ],
+                    role="assistant",
+                    status="completed",
+                    type="message",
+                )
+            ],
+            usage=Usage(),
+            response_id=None,
+        )
+
+    async def stream_response(
+        self,
+        system_instructions: str | None,
+        input: str | list[TResponseInputItem],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: AgentOutputSchemaBase | None,
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        **kwargs: Any,
+    ) -> AsyncIterator[TResponseStreamEvent]:
+        # Yield text deltas
+        yield ResponseTextDeltaEvent(
+            content_index=0,
+            delta="Hello ",
+            item_id="item1",
+            output_index=0,
+            sequence_number=0,
+            type="response.output_text.delta",
+            logprobs=[],
+        )
+        yield ResponseTextDeltaEvent(
+            content_index=0,
+            delta="world!",
+            item_id="item1",
+            output_index=0,
+            sequence_number=1,
+            type="response.output_text.delta",
+            logprobs=[],
+        )
+
+        # Yield the final completed event
+        response = Response(
+            id="resp_test",
+            created_at=0,
+            error=None,
+            incomplete_details=None,
+            instructions=None,
+            metadata={},
+            model="test",
+            object="response",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_test",
+                    content=[
+                        ResponseOutputText(
+                            text="Hello world!",
+                            annotations=[],
+                            type="output_text",
+                            logprobs=[],
+                        )
+                    ],
+                    role="assistant",
+                    status="completed",
+                    type="message",
+                )
+            ],
+            parallel_tool_calls=True,
+            temperature=1.0,
+            tool_choice="auto",
+            tools=[],
+            top_p=1.0,
+            status="completed",
+            text=ResponseTextConfig(format=ResponseFormatText(type="text")),
+            truncation="disabled",
+            usage=ResponseUsage(
+                input_tokens=10,
+                output_tokens=5,
+                total_tokens=15,
+                input_tokens_details=InputTokensDetails(cached_tokens=0),
+                output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
+            ),
+        )
+        yield ResponseCompletedEvent(
+            response=response, sequence_number=2, type="response.completed"
+        )
+
+
+@workflow.defn
+class StreamingOpenAIWorkflow:
+    """Test workflow that uses streaming model activity with PubSub."""
+
+    @workflow.init
+    def __init__(self, prompt: str) -> None:
+        self.pubsub = PubSub()
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        agent = Agent[None](
+            name="Assistant",
+            instructions="You are a test agent.",
+        )
+        result = await Runner.run(starting_agent=agent, input=prompt)
+        return result.final_output
+
+
+@workflow.defn
+class NonStreamingOpenAIWorkflow:
+    """Test workflow without streaming -- verifies backward compatibility."""
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        agent = Agent[None](
+            name="Assistant",
+            instructions="You are a test agent.",
+        )
+        result = await Runner.run(starting_agent=agent, input=prompt)
+        return result.final_output
+
+
+@pytest.mark.asyncio
+async def test_streaming_publishes_raw_events(client: Client):
+    """Every event from model.stream_response() lands on the pubsub topic
+    as its native OpenAI Pydantic JSON, and the workflow gets the final
+    text from the ResponseCompletedEvent."""
+    async with AgentEnvironment(
+        model=StreamingTestModel(),
+        model_params=ModelActivityParameters(
+            start_to_close_timeout=timedelta(seconds=30),
+            enable_streaming=True,
+        ),
+    ) as env:
+        client = env.applied_on_client(client)
+        workflow_id = f"openai-streaming-test-{uuid.uuid4()}"
+
+        async with new_worker(
+            client, StreamingOpenAIWorkflow, max_cached_workflows=0
+        ) as worker:
+            handle = await client.start_workflow(
+                StreamingOpenAIWorkflow.run,
+                "Hello",
+                id=workflow_id,
+                task_queue=worker.task_queue,
+                execution_timeout=timedelta(seconds=30),
+            )
+
+            pubsub = PubSubClient.create(client, workflow_id)
+            events: list[dict] = []
+
+            async def collect_events() -> None:
+                async for item in pubsub.subscribe(
+                    ["events"], from_offset=0, result_type=bytes, poll_cooldown=0.05
+                ):
+                    event = json.loads(item.data)
+                    events.append(event)
+                    if event["type"] == "response.completed":
+                        break
+
+            collect_task = asyncio.create_task(collect_events())
+            result = await handle.result()
+            await asyncio.wait_for(collect_task, timeout=10.0)
+
+    assert result == "Hello world!"
+
+    # Exact event sequence matches what StreamingTestModel yields — no
+    # normalization, no synthesized brackets.
+    types_in_order = [e["type"] for e in events]
+    assert types_in_order == [
+        "response.output_text.delta",
+        "response.output_text.delta",
+        "response.completed",
+    ], f"Unexpected event sequence: {types_in_order}"
+
+    deltas = [e["delta"] for e in events if e["type"] == "response.output_text.delta"]
+    assert deltas == ["Hello ", "world!"]
+
+
+@pytest.mark.asyncio
+async def test_non_streaming_backward_compatible(client: Client):
+    """Verify non-streaming mode still works (backward compatibility)."""
+    model = StreamingTestModel()
+    async with AgentEnvironment(
+        model=model,
+        model_params=ModelActivityParameters(
+            start_to_close_timeout=timedelta(seconds=30),
+            enable_streaming=False,
+        ),
+    ) as env:
+        client = env.applied_on_client(client)
+
+        async with new_worker(
+            client,
+            NonStreamingOpenAIWorkflow,
+            max_cached_workflows=0,
+        ) as worker:
+            result = await client.execute_workflow(
+                NonStreamingOpenAIWorkflow.run,
+                "Hello",
+                id=f"openai-non-streaming-test-{uuid.uuid4()}",
+                task_queue=worker.task_queue,
+                execution_timeout=timedelta(seconds=30),
+            )
+
+    assert result == "Hello world!"
+
+
+class TruncatedStreamingTestModel(Model):
+    """Fake model whose stream ends without a ResponseCompletedEvent."""
+
+    __test__ = False
+
+    async def get_response(self, *a: Any, **kw: Any) -> ModelResponse:
+        raise NotImplementedError
+
+    async def stream_response(
+        self, *a: Any, **kw: Any
+    ) -> AsyncIterator[TResponseStreamEvent]:
+        yield ResponseTextDeltaEvent(
+            content_index=0,
+            delta="partial",
+            item_id="item1",
+            output_index=0,
+            sequence_number=0,
+            type="response.output_text.delta",
+            logprobs=[],
+        )
+
+
+@pytest.mark.asyncio
+async def test_streaming_raises_when_no_completed_event(client: Client):
+    """A stream that ends without ResponseCompletedEvent surfaces as a
+    non-retryable ApplicationError on the workflow."""
+    async with AgentEnvironment(
+        model=TruncatedStreamingTestModel(),
+        model_params=ModelActivityParameters(
+            start_to_close_timeout=timedelta(seconds=30),
+            enable_streaming=True,
+        ),
+    ) as env:
+        client = env.applied_on_client(client)
+        async with new_worker(
+            client, StreamingOpenAIWorkflow, max_cached_workflows=0
+        ) as worker:
+            with pytest.raises(WorkflowFailureError) as exc_info:
+                await client.execute_workflow(
+                    StreamingOpenAIWorkflow.run,
+                    "Hi",
+                    id=f"openai-streaming-truncated-{uuid.uuid4()}",
+                    task_queue=worker.task_queue,
+                    execution_timeout=timedelta(seconds=30),
+                )
+
+    # Unwrap: WorkflowFailureError -> ActivityError -> ApplicationError
+    cause = exc_info.value.__cause__
+    while cause is not None and not isinstance(cause, ApplicationError):
+        cause = cause.__cause__
+    assert isinstance(
+        cause, ApplicationError
+    ), f"Expected ApplicationError cause, got {exc_info.value!r}"
+    assert "Stream ended without ResponseCompletedEvent" in str(cause)
+    assert cause.non_retryable is True
diff --git a/tests/contrib/pubsub/__init__.py b/tests/contrib/pubsub/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/contrib/pubsub/test_payload_roundtrip_prototype.py b/tests/contrib/pubsub/test_payload_roundtrip_prototype.py
new file mode 100644
index 000000000..b020d3e4f
--- /dev/null
+++ b/tests/contrib/pubsub/test_payload_roundtrip_prototype.py
@@ -0,0 +1,145 @@
+"""Prototype tests that de-risked the pubsub bytes -> Payload migration.
+
+The migration doc (``docs/pubsub-payload-migration.md``) flagged two
+load-bearing questions, answered empirically here:
+
+1. Does the default JSON converter handle ``Payload`` embedded in a
+   dataclass? **No** — serialization fails with ``TypeError``. This
+   rules out a naive nested-Payload wire format.
+2. Does a proto-serialized ``Payload`` inside a dataclass round-trip?
+   **Yes**. This is the wire format the migration adopts: base64 of
+   ``Payload.SerializeToString()`` inside ``PublishEntry``/``_WireItem``,
+   surfacing ``Payload`` (or a decoded value via ``result_type=``) at
+   the user API.
+
+Kept as a regression guard: if a future payload converter change makes
+(1) succeed, the migration could in principle reclaim a zero-copy wire
+format; if (2) regresses, the migration breaks.
+"""
+
+from __future__ import annotations
+
+import base64
+import uuid
+from dataclasses import dataclass, field
+
+import pytest
+
+from temporalio import workflow
+from temporalio.api.common.v1 import Payload
+from temporalio.client import Client
+from tests.helpers import new_worker
+
+
+@dataclass
+class NestedPayloadEnvelope:
+    items: list[Payload] = field(default_factory=list)
+
+
+@dataclass
+class SerializedEntry:
+    topic: str
+    data: str  # base64(Payload.SerializeToString())
+
+
+@dataclass
+class SerializedEnvelope:
+    items: list[SerializedEntry] = field(default_factory=list)
+
+
+@workflow.defn
+class NestedPayloadWorkflow:
+    def __init__(self) -> None:
+        self._received: NestedPayloadEnvelope | None = None
+
+    @workflow.signal
+    def receive(self, envelope: NestedPayloadEnvelope) -> None:
+        self._received = envelope
+
+    @workflow.query
+    def decoded_strings(self) -> list[str]:
+        assert self._received is not None
+        conv = workflow.payload_converter()
+        return [conv.from_payload(p, str) for p in self._received.items]
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._received is not None)
+
+
+@workflow.defn
+class SerializedPayloadWorkflow:
+    def __init__(self) -> None:
+        self._received: SerializedEnvelope | None = None
+
+    @workflow.signal
+    def receive(self, envelope: SerializedEnvelope) -> None:
+        self._received = envelope
+
+    @workflow.query
+    def decoded_strings(self) -> list[str]:
+        assert self._received is not None
+        conv = workflow.payload_converter()
+        out: list[str] = []
+        for entry in self._received.items:
+            p = Payload()
+            p.ParseFromString(base64.b64decode(entry.data))
+            out.append(conv.from_payload(p, str))
+        return out
+
+    @workflow.query
+    def topics(self) -> list[str]:
+        assert self._received is not None
+        return [e.topic for e in self._received.items]
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._received is not None)
+
+
+@pytest.mark.asyncio
+async def test_nested_payload_in_dataclass_fails(client: Client) -> None:
+    """Confirm the load-bearing negative result: Payload inside dataclass doesn't serialize."""
+    conv = client.data_converter.payload_converter
+    payloads = [conv.to_payloads([v])[0] for v in ["hello", "world"]]
+    envelope = NestedPayloadEnvelope(items=payloads)
+
+    async with new_worker(client, NestedPayloadWorkflow) as worker:
+        handle = await client.start_workflow(
+            NestedPayloadWorkflow.run,
+            id=f"nested-payload-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        with pytest.raises(TypeError, match="Payload is not JSON serializable"):
+            await handle.signal(NestedPayloadWorkflow.receive, envelope)
+        await handle.terminate()
+
+
+@pytest.mark.asyncio
+async def test_serialized_payload_fallback_round_trips(client: Client) -> None:
+    """Proto-serialize Payload -> base64 -> dataclass round-trips through signal."""
+    conv = client.data_converter.payload_converter
+    originals = ["hello", "world", "payload"]
+    payloads = [conv.to_payloads([v])[0] for v in originals]
+    envelope = SerializedEnvelope(
+        items=[
+            SerializedEntry(
+                topic=f"t{i}",
+                data=base64.b64encode(p.SerializeToString()).decode("ascii"),
+            )
+            for i, p in enumerate(payloads)
+        ]
+    )
+
+    async with new_worker(client, SerializedPayloadWorkflow) as worker:
+        handle = await client.start_workflow(
+            SerializedPayloadWorkflow.run,
+            id=f"serialized-payload-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        await handle.signal(SerializedPayloadWorkflow.receive, envelope)
+        decoded = await handle.query(SerializedPayloadWorkflow.decoded_strings)
+        assert decoded == originals
+        topics = await handle.query(SerializedPayloadWorkflow.topics)
+        assert topics == ["t0", "t1", "t2"]
+        await handle.result()
diff --git a/tests/contrib/pubsub/test_pubsub.py b/tests/contrib/pubsub/test_pubsub.py
new file mode 100644
index 000000000..626f23ffd
--- /dev/null
+++ b/tests/contrib/pubsub/test_pubsub.py
@@ -0,0 +1,1866 @@
+"""E2E integration tests for temporalio.contrib.pubsub."""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+import uuid
+from dataclasses import dataclass
+from datetime import timedelta
+from typing import Any
+from unittest.mock import patch
+
+if sys.version_info >= (3, 11):
+    from asyncio import timeout as _async_timeout  # pyright: ignore[reportUnreachable]
+else:
+    from async_timeout import (  # pyright: ignore[reportMissingImports, reportUnreachable]
+        timeout as _async_timeout,
+    )
+
+import google.protobuf.duration_pb2
+import nexusrpc
+import nexusrpc.handler
+import pytest
+
+import temporalio.api.nexus.v1
+import temporalio.api.operatorservice.v1
+import temporalio.api.workflowservice.v1
+from temporalio import activity, nexus, workflow
+from temporalio.client import Client, WorkflowHandle, WorkflowUpdateFailedError
+from temporalio.contrib.pubsub import (
+    PollInput,
+    PollResult,
+    PublishEntry,
+    PublishInput,
+    PubSub,
+    PubSubClient,
+    PubSubItem,
+    PubSubState,
+)
+from temporalio.contrib.pubsub._types import _encode_payload
+from temporalio.converter import DataConverter
+from temporalio.nexus import WorkflowRunOperationContext, workflow_run_operation
+from temporalio.testing import WorkflowEnvironment
+from temporalio.worker import Worker
+from tests.helpers import assert_eq_eventually, new_worker
+from tests.helpers.nexus import make_nexus_endpoint_name
+
+
+def _wire_bytes(data: bytes) -> str:
+    """Build a PublishEntry.data string from raw bytes.
+
+    Mirrors what :class:`PubSubClient` produces on the encode path:
+    default payload converter turns the bytes into a ``Payload``, which
+    is then proto-serialized and base64-encoded for the wire.
+    """
+    payload = DataConverter.default.payload_converter.to_payloads([data])[0]
+    return _encode_payload(payload)
+
+
+# ---------------------------------------------------------------------------
+# Test workflows (must be module-level, not local classes)
+# ---------------------------------------------------------------------------
+
+
+@workflow.defn
+class BasicPubSubWorkflow:
+    @workflow.init
+    def __init__(self) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class ActivityPublishWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_items",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.pubsub.publish("status", b"activity_done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@dataclass
+class AgentEvent:
+    kind: str
+    payload: dict[str, Any]
+
+
+@workflow.defn
+class StructuredPublishWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        for i in range(count):
+            self.pubsub.publish("events", AgentEvent(kind="tick", payload={"i": i}))
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class WorkflowSidePublishWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        for i in range(count):
+            self.pubsub.publish("events", f"item-{i}".encode())
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class MultiTopicWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_multi_topic",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class InterleavedWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        self.pubsub.publish("status", b"started")
+        await workflow.execute_activity(
+            "publish_items",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.pubsub.publish("status", b"done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class PriorityWorkflow:
+    @workflow.init
+    def __init__(self) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.execute_activity(
+            "publish_with_priority",
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class FlushOnExitWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_batch_test",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class MaxBatchWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.query
+    def publisher_sequences(self) -> dict[str, int]:
+        return dict(self.pubsub._publisher_sequences)
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_with_max_batch",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.pubsub.publish("status", b"activity_done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class LatePubSubWorkflow:
+    """Calls PubSub() from @workflow.run, not from @workflow.init.
+
+    The constructor inspects the caller's frame and requires the
+    function name to be ``__init__``; called from ``run``, it must
+    raise ``RuntimeError``. The workflow returns the error message so
+    the test can assert on it without forcing a workflow task failure.
+    """
+
+    @workflow.run
+    async def run(self) -> str:
+        try:
+            PubSub()
+        except RuntimeError as e:
+            return str(e)
+        return "no error raised"
+
+
+@workflow.defn
+class DoubleInitWorkflow:
+    """Calls PubSub() twice from @workflow.init.
+
+    The first call succeeds; the second must raise RuntimeError because
+    the pub/sub signal handler is already registered. The workflow
+    stashes the error message so the test can assert on it without
+    forcing a workflow task failure.
+    """
+
+    @workflow.init
+    def __init__(self) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+        self.double_init_error: str | None = None
+        try:
+            PubSub()
+        except RuntimeError as e:
+            self.double_init_error = str(e)
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.query
+    def get_double_init_error(self) -> str | None:
+        return self.double_init_error
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+# ---------------------------------------------------------------------------
+# Activities
+# ---------------------------------------------------------------------------
+
+
+@activity.defn(name="publish_items")
+async def publish_items(count: int) -> None:
+    client = PubSubClient.from_activity(batch_interval=0.5)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+
+
+@activity.defn(name="publish_multi_topic")
+async def publish_multi_topic(count: int) -> None:
+    topics = ["a", "b", "c"]
+    client = PubSubClient.from_activity(batch_interval=0.5)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            topic = topics[i % len(topics)]
+            client.publish(topic, f"{topic}-{i}".encode())
+
+
+@activity.defn(name="publish_with_priority")
+async def publish_with_priority() -> None:
+    # Long batch_interval AND long post-publish hold ensure that only a
+    # working force_flush wakeup can deliver items before __aexit__ flushes.
+    # The hold is deliberately much longer than the test's collect timeout
+    # so a regression (force_flush no-op) surfaces as a missing item rather
+    # than flaking on slow CI.
+    client = PubSubClient.from_activity(batch_interval=60.0)
+    async with client:
+        client.publish("events", b"normal-0")
+        client.publish("events", b"normal-1")
+        client.publish("events", b"priority", force_flush=True)
+        for _ in range(100):
+            activity.heartbeat()
+            await asyncio.sleep(0.1)
+
+
+@activity.defn(name="publish_batch_test")
+async def publish_batch_test(count: int) -> None:
+    client = PubSubClient.from_activity(batch_interval=60.0)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+
+
+@activity.defn(name="publish_with_max_batch")
+async def publish_with_max_batch(count: int) -> None:
+    client = PubSubClient.from_activity(batch_interval=60.0, max_batch_size=3)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+            # Yield so the flusher task can run when max_batch_size triggers
+            # _flush_event. Real workloads (e.g. agents awaiting LLM streams)
+            # yield constantly; a tight loop with no awaits would never let
+            # the flusher fire and would collapse back to exit-only flushing.
+            await asyncio.sleep(0)
+        # Long batch_interval ensures only max_batch_size triggers flushes.
+        # Context manager exit flushes any remainder.
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _is_different_run(
+    old_handle: WorkflowHandle[Any, Any],
+    new_handle: WorkflowHandle[Any, Any],
+) -> bool:
+    """Check if new_handle points to a different run than old_handle."""
+    try:
+        desc = await new_handle.describe()
+        return desc.run_id != old_handle.result_run_id
+    except Exception:
+        return False
+
+
+async def collect_items(
+    client: Client,
+    handle: WorkflowHandle[Any, Any],
+    topics: list[str] | None,
+    from_offset: int,
+    expected_count: int,
+    timeout: float = 15.0,
+    *,
+    result_type: type | None = bytes,
+) -> list[PubSubItem]:
+    """Subscribe and collect exactly expected_count items, with timeout.
+
+    Default ``result_type=bytes`` matches the bytes-oriented tests that
+    compare ``item.data`` against literal byte strings. Pass
+    ``result_type=None`` to receive raw ``Payload`` objects.
+    """
+    pubsub = PubSubClient.create(client, handle.id)
+    items: list[PubSubItem] = []
+    try:
+        async with _async_timeout(timeout):
+            async for item in pubsub.subscribe(
+                topics=topics,
+                from_offset=from_offset,
+                poll_cooldown=0,
+                result_type=result_type,
+            ):
+                items.append(item)
+                if len(items) >= expected_count:
+                    break
+    except asyncio.TimeoutError:
+        pass
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_activity_publish_and_subscribe(client: Client) -> None:
+    """Activity publishes items, external client subscribes and receives them."""
+    count = 10
+    async with new_worker(
+        client,
+        ActivityPublishWorkflow,
+        activities=[publish_items],
+    ) as worker:
+        handle = await client.start_workflow(
+            ActivityPublishWorkflow.run,
+            count,
+            id=f"pubsub-basic-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # Collect activity items + the "activity_done" status item
+        items = await collect_items(client, handle, None, 0, count + 1)
+        assert len(items) == count + 1
+
+        # Check activity items
+        for i in range(count):
+            assert items[i].topic == "events"
+            assert items[i].data == f"item-{i}".encode()
+
+        # Check workflow-side status item
+        assert items[count].topic == "status"
+        assert items[count].data == b"activity_done"
+
+        await handle.signal(ActivityPublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_structured_type_round_trip(client: Client) -> None:
+    """Workflow publishes dataclass values; subscriber decodes via result_type."""
+    count = 4
+    async with new_worker(client, StructuredPublishWorkflow) as worker:
+        handle = await client.start_workflow(
+            StructuredPublishWorkflow.run,
+            count,
+            id=f"pubsub-structured-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        items = await collect_items(
+            client, handle, None, 0, count, result_type=AgentEvent
+        )
+        assert len(items) == count
+        for i, item in enumerate(items):
+            assert isinstance(item.data, AgentEvent)
+            assert item.data == AgentEvent(kind="tick", payload={"i": i})
+
+        await handle.signal(StructuredPublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_topic_filtering(client: Client) -> None:
+    """Publish to multiple topics, subscribe with filter."""
+    count = 9  # 3 per topic
+    async with new_worker(
+        client,
+        MultiTopicWorkflow,
+        activities=[publish_multi_topic],
+    ) as worker:
+        handle = await client.start_workflow(
+            MultiTopicWorkflow.run,
+            count,
+            id=f"pubsub-filter-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe to topic "a" only — should get 3 items
+        a_items = await collect_items(client, handle, ["a"], 0, 3)
+        assert len(a_items) == 3
+        assert all(item.topic == "a" for item in a_items)
+
+        # Subscribe to ["a", "c"] — should get 6 items
+        ac_items = await collect_items(client, handle, ["a", "c"], 0, 6)
+        assert len(ac_items) == 6
+        assert all(item.topic in ("a", "c") for item in ac_items)
+
+        # Subscribe to all (None) — should get all 9
+        all_items = await collect_items(client, handle, None, 0, 9)
+        assert len(all_items) == 9
+
+        await handle.signal(MultiTopicWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_subscribe_from_offset_and_per_item_offsets(client: Client) -> None:
+    """Subscribe from zero and non-zero offsets; each item carries its global offset."""
+    count = 5
+    async with new_worker(
+        client,
+        WorkflowSidePublishWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            WorkflowSidePublishWorkflow.run,
+            count,
+            id=f"pubsub-offset-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe from offset 0 — all items, offsets 0..count-1
+        all_items = await collect_items(client, handle, None, 0, count)
+        assert len(all_items) == count
+        for i, item in enumerate(all_items):
+            assert item.offset == i
+            assert item.data == f"item-{i}".encode()
+
+        # Subscribe from offset 3 — items 3, 4 with offsets 3, 4
+        later_items = await collect_items(client, handle, None, 3, 2)
+        assert len(later_items) == 2
+        assert later_items[0].offset == 3
+        assert later_items[0].data == b"item-3"
+        assert later_items[1].offset == 4
+        assert later_items[1].data == b"item-4"
+
+        await handle.signal(WorkflowSidePublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_per_item_offsets_with_topic_filter(client: Client) -> None:
+    """Per-item offsets are global (not per-topic) even when filtering."""
+    count = 9  # 3 per topic (a, b, c round-robin)
+    async with new_worker(
+        client,
+        MultiTopicWorkflow,
+        activities=[publish_multi_topic],
+    ) as worker:
+        handle = await client.start_workflow(
+            MultiTopicWorkflow.run,
+            count,
+            id=f"pubsub-item-offset-filter-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe to topic "a" only — items are at global offsets 0, 3, 6
+        a_items = await collect_items(client, handle, ["a"], 0, 3)
+        assert len(a_items) == 3
+        assert a_items[0].offset == 0
+        assert a_items[1].offset == 3
+        assert a_items[2].offset == 6
+
+        # Subscribe to topic "b" — items are at global offsets 1, 4, 7
+        b_items = await collect_items(client, handle, ["b"], 0, 3)
+        assert len(b_items) == 3
+        assert b_items[0].offset == 1
+        assert b_items[1].offset == 4
+        assert b_items[2].offset == 7
+
+        await handle.signal(MultiTopicWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_poll_truncated_offset_returns_application_error(client: Client) -> None:
+    """Polling a truncated offset raises ApplicationError (not ValueError)
+    and does not crash the workflow task."""
+    async with new_worker(
+        client,
+        TruncateWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateWorkflow.run,
+            id=f"pubsub-trunc-error-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(f"item-{i}".encode()))
+                    for i in range(5)
+                ]
+            ),
+        )
+
+        # Truncate up to offset 3 via update — completion is explicit.
+        await handle.execute_update("truncate", 3)
+
+        # Poll from offset 1 (truncated) — should get ApplicationError,
+        # NOT crash the workflow task. Catching WorkflowUpdateFailedError is
+        # sufficient to prove the handler raised ApplicationError: Temporal's
+        # update protocol completes the update with this error only when the
+        # handler raises ApplicationError. A bare ValueError (or any other
+        # exception) would fail the workflow task instead, causing
+        # execute_update to hang — not raise. The follow-up collect_items
+        # below proves the workflow task wasn't poisoned.
+        with pytest.raises(WorkflowUpdateFailedError):
+            await handle.execute_update(
+                "__pubsub_poll",
+                PollInput(topics=[], from_offset=1),
+                result_type=PollResult,
+            )
+
+        # Workflow should still be usable — poll from valid offset 3
+        items = await collect_items(client, handle, None, 3, 2)
+        assert len(items) == 2
+        assert items[0].offset == 3
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_subscribe_recovers_from_truncation(client: Client) -> None:
+    """subscribe() auto-recovers when offset falls behind truncation."""
+    async with new_worker(
+        client,
+        TruncateWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateWorkflow.run,
+            id=f"pubsub-trunc-recover-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(f"item-{i}".encode()))
+                    for i in range(5)
+                ]
+            ),
+        )
+
+        # Truncate first 3. The update returns after the handler completes.
+        await handle.execute_update("truncate", 3)
+
+        # subscribe from offset 1 (truncated) — should auto-recover
+        # and deliver items from base_offset (3)
+        pubsub = PubSubClient(handle)
+        items: list[PubSubItem] = []
+        try:
+            async with _async_timeout(5):
+                async for item in pubsub.subscribe(
+                    from_offset=1, poll_cooldown=0, result_type=bytes
+                ):
+                    items.append(item)
+                    if len(items) >= 2:
+                        break
+        except asyncio.TimeoutError:
+            pass
+        assert len(items) == 2
+        assert items[0].offset == 3
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_workflow_and_activity_publish_interleaved(client: Client) -> None:
+    """Workflow publishes status events around activity publishing."""
+    count = 5
+    async with new_worker(
+        client,
+        InterleavedWorkflow,
+        activities=[publish_items],
+    ) as worker:
+        handle = await client.start_workflow(
+            InterleavedWorkflow.run,
+            count,
+            id=f"pubsub-interleave-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Total: 1 (started) + count (activity) + 1 (done) = count + 2
+        items = await collect_items(client, handle, None, 0, count + 2)
+        assert len(items) == count + 2
+
+        # First item is workflow-side "started"
+        assert items[0].topic == "status"
+        assert items[0].data == b"started"
+
+        # Middle items are from activity
+        for i in range(count):
+            assert items[i + 1].topic == "events"
+            assert items[i + 1].data == f"item-{i}".encode()
+
+        # Last item is workflow-side "done"
+        assert items[count + 1].topic == "status"
+        assert items[count + 1].data == b"done"
+
+        await handle.signal(InterleavedWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_priority_flush(client: Client) -> None:
+    """Priority publish triggers immediate flush without waiting for timer."""
+    async with new_worker(
+        client,
+        PriorityWorkflow,
+        activities=[publish_with_priority],
+    ) as worker:
+        handle = await client.start_workflow(
+            PriorityWorkflow.run,
+            id=f"pubsub-priority-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # If priority works, items arrive within milliseconds of the publish.
+        # The activity holds for ~10s after priority publish; this timeout
+        # gives plenty of margin for workflow/worker scheduling on slow CI
+        # while staying well below the activity hold so a regression (no
+        # priority wakeup) surfaces as a missing item, not a pass via
+        # __aexit__ flush.
+        items = await collect_items(client, handle, None, 0, 3, timeout=5.0)
+        assert len(items) == 3
+        assert items[2].data == b"priority"
+
+        await handle.signal(PriorityWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_iterator_cancellation(client: Client) -> None:
+    """Cancelling a subscription iterator after it has yielded an item
+    completes cleanly."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-cancel-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Seed one item so the iterator provably reaches an active state
+        # before we cancel — no sleep-based wait.
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"seed"))]
+            ),
+        )
+
+        pubsub_client = PubSubClient.create(client, handle.id)
+        first_item = asyncio.Event()
+        items: list[PubSubItem] = []
+
+        async def subscribe_and_collect() -> None:
+            async for item in pubsub_client.subscribe(
+                from_offset=0, poll_cooldown=0, result_type=bytes
+            ):
+                items.append(item)
+                first_item.set()
+
+        task = asyncio.create_task(subscribe_and_collect())
+        # Bounded wait so a subscribe regression fails fast instead of hanging.
+        async with _async_timeout(5):
+            await first_item.wait()
+        task.cancel()
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+
+        assert len(items) == 1
+        assert items[0].data == b"seed"
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_context_manager_flushes_on_exit(client: Client) -> None:
+    """Context manager exit flushes all buffered items."""
+    count = 5
+    async with new_worker(
+        client,
+        FlushOnExitWorkflow,
+        activities=[publish_batch_test],
+    ) as worker:
+        handle = await client.start_workflow(
+            FlushOnExitWorkflow.run,
+            count,
+            id=f"pubsub-flush-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Despite 60s batch interval, all items arrive because __aexit__ flushes
+        items = await collect_items(client, handle, None, 0, count, timeout=15.0)
+        assert len(items) == count
+        for i in range(count):
+            assert items[i].data == f"item-{i}".encode()
+
+        await handle.signal(FlushOnExitWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_concurrent_subscribers(client: Client) -> None:
+    """Two subscribers on different topics make interleaved progress.
+
+    Publishes A-0, waits for subscriber A to observe it; publishes B-0,
+    waits for subscriber B to observe it. At this point both subscribers
+    have received exactly one item and are polling for their second,
+    so both subscriptions are provably in flight at the same time.
+    Then publishes A-1, B-1 the same way. A sequential execution (A drains
+    then B starts) cannot satisfy the ordering because B's first item
+    isn't published until after A has already received its first.
+    """
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-concurrent-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        pubsub = PubSubClient(handle)
+        a_items: list[PubSubItem] = []
+        b_items: list[PubSubItem] = []
+        a_got = [asyncio.Event(), asyncio.Event()]
+        b_got = [asyncio.Event(), asyncio.Event()]
+
+        async def collect(
+            topic: str,
+            collected: list[PubSubItem],
+            events: list[asyncio.Event],
+        ) -> None:
+            async for item in pubsub.subscribe(
+                topics=[topic], from_offset=0, poll_cooldown=0, result_type=bytes
+            ):
+                collected.append(item)
+                events[len(collected) - 1].set()
+                if len(collected) >= len(events):
+                    break
+
+        a_task = asyncio.create_task(collect("a", a_items, a_got))
+        b_task = asyncio.create_task(collect("b", b_items, b_got))
+
+        async def publish(topic: str, data: bytes) -> None:
+            await handle.signal(
+                "__pubsub_publish",
+                PublishInput(items=[PublishEntry(topic=topic, data=_wire_bytes(data))]),
+            )
+
+        try:
+            async with _async_timeout(10):
+                await publish("a", b"a-0")
+                await a_got[0].wait()
+                await publish("b", b"b-0")
+                await b_got[0].wait()
+                # Both subscribers are now mid-subscription, each having
+                # seen one item and polling for the next.
+                await publish("a", b"a-1")
+                await a_got[1].wait()
+                await publish("b", b"b-1")
+                await b_got[1].wait()
+
+            await asyncio.gather(a_task, b_task)
+        finally:
+            a_task.cancel()
+            b_task.cancel()
+
+        assert [i.data for i in a_items] == [b"a-0", b"a-1"]
+        assert [i.data for i in b_items] == [b"b-0", b"b-1"]
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_max_batch_size(client: Client) -> None:
+    """max_batch_size triggers auto-flush without waiting for timer."""
+    count = 7  # with max_batch_size=3: flushes at 3, 6, then remainder 1 on exit
+    async with new_worker(
+        client,
+        MaxBatchWorkflow,
+        activities=[publish_with_max_batch],
+        max_cached_workflows=0,
+    ) as worker:
+        handle = await client.start_workflow(
+            MaxBatchWorkflow.run,
+            count,
+            id=f"pubsub-maxbatch-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # count items from activity + 1 "activity_done" from workflow
+        items = await collect_items(client, handle, None, 0, count + 1, timeout=15.0)
+        assert len(items) == count + 1
+        for i in range(count):
+            assert items[i].data == f"item-{i}".encode()
+
+        # max_batch_size actually engages: at least one flush fires during
+        # the publish loop, so 7 items ship as >=2 signals. Without this
+        # assertion the test would pass even if max_batch_size were ignored
+        # and all 7 items went out in a single exit-time flush (batch_count
+        # == 1). Note: max_batch_size is a *trigger* threshold, not a cap —
+        # the flusher may take more items from the buffer than max_batch_size
+        # if more were added while a prior signal was in flight, so the exact
+        # batch count depends on interleaving. Asserting >= 2 is the
+        # non-flaky way to verify the mechanism is live.
+        seqs = await handle.query(MaxBatchWorkflow.publisher_sequences)
+        assert len(seqs) == 1, f"expected one publisher, got {seqs}"
+        (batch_count,) = seqs.values()
+        assert batch_count >= 2, (
+            f"expected >=2 batches with max_batch_size=3 and 7 items, got "
+            f"{batch_count} — max_batch_size did not trigger a mid-loop flush"
+        )
+
+        await handle.signal(MaxBatchWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_replay_safety(client: Client) -> None:
+    """Pub/sub broker survives workflow replay (max_cached_workflows=0)."""
+    async with new_worker(
+        client,
+        InterleavedWorkflow,
+        activities=[publish_items],
+        max_cached_workflows=0,
+    ) as worker:
+        handle = await client.start_workflow(
+            InterleavedWorkflow.run,
+            5,
+            id=f"pubsub-replay-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # 1 (started) + 5 (activity) + 1 (done) = 7
+        items = await collect_items(client, handle, None, 0, 7)
+        # Full ordered sequence — endpoint-only checks would miss mid-stream
+        # replay corruption (reordering, duplication, dropped items).
+        assert [i.data for i in items] == [
+            b"started",
+            b"item-0",
+            b"item-1",
+            b"item-2",
+            b"item-3",
+            b"item-4",
+            b"done",
+        ]
+        assert [i.offset for i in items] == list(range(7))
+        await handle.signal(InterleavedWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_flush_retry_preserves_items_after_failures(
+    client: Client,
+) -> None:
+    """After flush failures, a subsequent successful flush delivers all items
+    in publish order, exactly once.
+
+    Exercises the retry code path behaviorally: simulated delivery failures
+    must not drop items, must not duplicate them on retry, and must not
+    reorder items published during the failed state.
+    """
+    async with new_worker(client, BasicPubSubWorkflow) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-flush-retry-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        pubsub = PubSubClient(handle)
+        real_signal = handle.signal
+        fail_remaining = 2
+
+        async def maybe_failing_signal(*args: Any, **kwargs: Any) -> Any:
+            nonlocal fail_remaining
+            if fail_remaining > 0:
+                fail_remaining -= 1
+                raise RuntimeError("simulated delivery failure")
+            return await real_signal(*args, **kwargs)
+
+        with patch.object(handle, "signal", side_effect=maybe_failing_signal):
+            pubsub.publish("events", b"item-0")
+            pubsub.publish("events", b"item-1")
+            with pytest.raises(RuntimeError):
+                await pubsub._flush()
+
+            # Publish more during the failed state — must not overtake the
+            # pending retry on eventual delivery.
+            pubsub.publish("events", b"item-2")
+            with pytest.raises(RuntimeError):
+                await pubsub._flush()
+
+            # Third flush succeeds, delivering the pending retry batch.
+            await pubsub._flush()
+            # Fourth flush delivers the buffered "item-2".
+            await pubsub._flush()
+
+        items = await collect_items(client, handle, None, 0, 3)
+        assert [i.data for i in items] == [b"item-0", b"item-1", b"item-2"]
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_flush_raises_after_max_retry_duration(client: Client) -> None:
+    """When max_retry_duration is exceeded, flush raises TimeoutError and the
+    client can resume publishing without losing subsequent items."""
+    async with new_worker(client, BasicPubSubWorkflow) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-retry-expiry-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Inject a controllable clock into the client module. The client's
+        # retry check compares `time.monotonic() - _pending_since` against
+        # `max_retry_duration`, so advancing the clock between flushes makes
+        # the timeout fire deterministically regardless of wall-clock speed
+        # or clock resolution.
+        pubsub = PubSubClient(handle, max_retry_duration=0.1)
+        real_signal = handle.signal
+        fail_signals = True
+
+        async def maybe_failing_signal(*args: Any, **kwargs: Any) -> Any:
+            if fail_signals:
+                raise RuntimeError("simulated failure")
+            return await real_signal(*args, **kwargs)
+
+        clock = [0.0]
+        with (
+            patch(
+                "temporalio.contrib.pubsub._client.time.monotonic",
+                side_effect=lambda: clock[0],
+            ),
+            patch.object(handle, "signal", side_effect=maybe_failing_signal),
+        ):
+            pubsub.publish("events", b"lost")
+
+            # First flush fails and enters the pending-retry state.
+            with pytest.raises(RuntimeError):
+                await pubsub._flush()
+
+            # Advance the clock well past max_retry_duration.
+            clock[0] = 10.0
+
+            # Next flush raises TimeoutError — the pending batch is abandoned.
+            with pytest.raises(TimeoutError, match="max_retry_duration"):
+                await pubsub._flush()
+
+            # Stop failing signals; subsequent publishes must succeed.
+            fail_signals = False
+            pubsub.publish("events", b"kept")
+            await pubsub._flush()
+
+        items = await collect_items(client, handle, None, 0, 1)
+        assert len(items) == 1
+        assert items[0].data == b"kept"
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_dedup_rejects_duplicate_signal(client: Client) -> None:
+    """Workflow deduplicates signals with the same publisher_id + sequence."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-dedup-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Send a batch with publisher_id and sequence
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"item-0"))],
+                publisher_id="test-pub",
+                sequence=1,
+            ),
+        )
+
+        # Send the same sequence again — should be deduped
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"duplicate"))],
+                publisher_id="test-pub",
+                sequence=1,
+            ),
+        )
+
+        # Send a new sequence — should go through
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"item-1"))],
+                publisher_id="test-pub",
+                sequence=2,
+            ),
+        )
+
+        # Should have 2 items, not 3 (collect_items' update call acts as barrier)
+        items = await collect_items(client, handle, None, 0, 2)
+        assert len(items) == 2
+        assert items[0].data == b"item-0"
+        assert items[1].data == b"item-1"
+
+        # Verify offset is 2 (not 3)
+        pubsub_client = PubSubClient(handle)
+        offset = await pubsub_client.get_offset()
+        assert offset == 2
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_double_init_raises(client: Client) -> None:
+    """Instantiating PubSub twice from @workflow.init raises RuntimeError.
+
+    The first PubSub() registers the __pubsub_publish signal handler; the
+    second call detects the existing handler and raises rather than
+    silently overwriting it.
+    """
+    async with new_worker(client, DoubleInitWorkflow) as worker:
+        handle = await client.start_workflow(
+            DoubleInitWorkflow.run,
+            id=f"pubsub-double-init-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        err = await handle.query(DoubleInitWorkflow.get_double_init_error)
+        assert err is not None
+        assert "already registered" in err
+        await handle.signal(DoubleInitWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_pubsub_outside_init_raises(client: Client) -> None:
+    """Constructing PubSub outside @workflow.init raises RuntimeError.
+
+    The workflow calls PubSub() from @workflow.run; the caller-frame
+    guard must reject the call because the caller's function name is
+    ``run``, not ``__init__``.
+    """
+    async with new_worker(client, LatePubSubWorkflow) as worker:
+        result = await client.execute_workflow(
+            LatePubSubWorkflow.run,
+            id=f"pubsub-late-init-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        assert "must be constructed directly from the workflow's" in result
+        assert "'run'" in result
+
+
+@pytest.mark.asyncio
+async def test_truncate_pubsub(client: Client) -> None:
+    """PubSub.truncate discards prefix and adjusts base_offset."""
+    async with new_worker(
+        client,
+        TruncateWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateWorkflow.run,
+            id=f"pubsub-truncate-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items via signal. collect_items below uses an update,
+        # which acts as a signal barrier.
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(f"item-{i}".encode()))
+                    for i in range(5)
+                ]
+            ),
+        )
+
+        # Verify all 5 items
+        items = await collect_items(client, handle, None, 0, 5)
+        assert len(items) == 5
+
+        # Truncate up to offset 3 (discard items 0, 1, 2). The update
+        # returns after the handler completes.
+        await handle.execute_update("truncate", 3)
+
+        # Offset should still be 5 (truncation moves base_offset, not tail)
+        pubsub_client = PubSubClient(handle)
+        offset = await pubsub_client.get_offset()
+        assert offset == 5
+
+        # Reading from offset 3 should work (items 3, 4)
+        items_after = await collect_items(client, handle, None, 3, 2)
+        assert len(items_after) == 2
+        assert items_after[0].data == b"item-3"
+        assert items_after[1].data == b"item-4"
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_ttl_pruning_in_get_pubsub_state(client: Client) -> None:
+    """PubSub.get_state prunes publishers whose last-seen time exceeds the
+    TTL while retaining newer publishers. The log itself is unaffected.
+
+    Uses a wall-clock gap between publishes so that workflow.time()
+    advances between the two publishers' tasks. workflow.time() can't be
+    cleanly injected from outside, so a short real sleep is the mechanism.
+    """
+    async with new_worker(
+        client,
+        TTLTestWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TTLTestWorkflow.run,
+            id=f"pubsub-ttl-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # pub-old arrives first.
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"old"))],
+                publisher_id="pub-old",
+                sequence=1,
+            ),
+        )
+
+        # Sanity: pub-old is recorded (generous TTL retains it).
+        state_before = await handle.query(TTLTestWorkflow.get_state_with_ttl, 9999.0)
+        assert "pub-old" in state_before.publisher_sequences
+
+        # Let workflow.time() advance by real wall-clock time. Use a
+        # generous gap (1.0s) relative to the TTL (0.5s) so the test
+        # tolerates CI scheduling delays — pub-old must be >=0.5s past,
+        # pub-new must be <0.5s past, at the moment of the query.
+        await asyncio.sleep(1.0)
+
+        # pub-new arrives after the gap.
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=_wire_bytes(b"new"))],
+                publisher_id="pub-new",
+                sequence=1,
+            ),
+        )
+
+        # TTL=0.5s prunes pub-old (~1.0s old) but keeps pub-new (~0s).
+        state = await handle.query(TTLTestWorkflow.get_state_with_ttl, 0.5)
+        assert "pub-old" not in state.publisher_sequences
+        assert "pub-new" in state.publisher_sequences
+        # Log contents are not touched by publisher pruning.
+        assert len(state.log) == 2
+
+        await handle.signal("close")
+
+
+# ---------------------------------------------------------------------------
+# Truncate and TTL test workflows
+# ---------------------------------------------------------------------------
+
+
+@workflow.defn
+class TruncateWorkflow:
+    """Test scaffolding that exposes PubSub.truncate via a user-authored
+    update.
+
+    The contrib module does not define a built-in external truncate API —
+    truncation is a workflow-internal decision (typically driven by
+    consumer progress or a retention policy). Workflows that want external
+    control wire up their own signal or update. We use an update here so
+    callers get explicit completion (signals are fire-and-forget).
+    """
+
+    @workflow.init
+    def __init__(self) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.update
+    def truncate(self, up_to_offset: int) -> None:
+        self.pubsub.truncate(up_to_offset)
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class TTLTestWorkflow:
+    """Workflow that exposes PubSub.get_state via query for TTL testing."""
+
+    @workflow.init
+    def __init__(self) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.query
+    def get_state_with_ttl(self, ttl: float) -> PubSubState:
+        return self.pubsub.get_state(publisher_ttl=ttl)
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+# ---------------------------------------------------------------------------
+# Continue-as-new workflow and test
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CANWorkflowInputTyped:
+    """Uses proper typing."""
+
+    pubsub_state: PubSubState | None = None
+
+
+@workflow.defn
+class ContinueAsNewTypedWorkflow:
+    """CAN workflow using properly-typed pubsub_state."""
+
+    @workflow.init
+    def __init__(self, input: CANWorkflowInputTyped) -> None:
+        self.pubsub = PubSub(prior_state=input.pubsub_state)
+        self._should_continue = False
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.signal
+    def trigger_continue(self) -> None:
+        self._should_continue = True
+
+    @workflow.query
+    def publisher_sequences(self) -> dict[str, int]:
+        return dict(self.pubsub._publisher_sequences)
+
+    @workflow.run
+    async def run(
+        self,
+        input: CANWorkflowInputTyped,  # type:ignore[reportUnusedParameter]
+    ) -> None:
+        while True:
+            await workflow.wait_condition(lambda: self._should_continue or self._closed)
+            if self._closed:
+                return
+            if self._should_continue:
+                self._should_continue = False
+                self.pubsub.drain()
+                await workflow.wait_condition(workflow.all_handlers_finished)
+                workflow.continue_as_new(
+                    args=[
+                        CANWorkflowInputTyped(
+                            pubsub_state=self.pubsub.get_state(),
+                        )
+                    ]
+                )
+
+
+@pytest.mark.asyncio
+async def test_continue_as_new_properly_typed(client: Client) -> None:
+    """CAN preserves the log, global offsets, AND publisher dedup state
+    when pubsub_state is properly typed as ``PubSubState | None``."""
+    async with new_worker(
+        client,
+        ContinueAsNewTypedWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            ContinueAsNewTypedWorkflow.run,
+            CANWorkflowInputTyped(),
+            id=f"pubsub-can-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 3 items with an explicit publisher_id/sequence so dedup
+        # state is seeded and we can verify it survives CAN.
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(b"item-0")),
+                    PublishEntry(topic="events", data=_wire_bytes(b"item-1")),
+                    PublishEntry(topic="events", data=_wire_bytes(b"item-2")),
+                ],
+                publisher_id="pub",
+                sequence=1,
+            ),
+        )
+
+        items_before = await collect_items(client, handle, None, 0, 3)
+        assert len(items_before) == 3
+
+        await handle.signal(ContinueAsNewTypedWorkflow.trigger_continue)
+
+        new_handle = client.get_workflow_handle(handle.id)
+        await assert_eq_eventually(
+            True,
+            lambda: _is_different_run(handle, new_handle),
+        )
+
+        # Log contents and offsets preserved across CAN.
+        items_after = await collect_items(client, new_handle, None, 0, 3)
+        assert [i.data for i in items_after] == [b"item-0", b"item-1", b"item-2"]
+        assert [i.offset for i in items_after] == [0, 1, 2]
+
+        # Dedup state preserved: the carried publisher_sequences dict has
+        # pub -> 1 after CAN.
+        seqs_after_can = await new_handle.query(
+            ContinueAsNewTypedWorkflow.publisher_sequences
+        )
+        assert seqs_after_can == {"pub": 1}
+
+        # Re-sending publisher_id="pub", sequence=1 must be rejected by
+        # dedup — both the log and the publisher_sequences entry stay put.
+        await new_handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(b"dup")),
+                ],
+                publisher_id="pub",
+                sequence=1,
+            ),
+        )
+        seqs_after_dup = await new_handle.query(
+            ContinueAsNewTypedWorkflow.publisher_sequences
+        )
+        assert seqs_after_dup == {"pub": 1}
+
+        # A fresh sequence from the same publisher is accepted, advances
+        # publisher_sequences to 2, and the new item gets offset 3.
+        await new_handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[
+                    PublishEntry(topic="events", data=_wire_bytes(b"item-3")),
+                ],
+                publisher_id="pub",
+                sequence=2,
+            ),
+        )
+        seqs_after_accept = await new_handle.query(
+            ContinueAsNewTypedWorkflow.publisher_sequences
+        )
+        assert seqs_after_accept == {"pub": 2}
+        items_all = await collect_items(client, new_handle, None, 0, 4)
+        assert [i.data for i in items_all] == [
+            b"item-0",
+            b"item-1",
+            b"item-2",
+            b"item-3",
+        ]
+        assert items_all[3].offset == 3
+
+        await new_handle.signal(ContinueAsNewTypedWorkflow.close)
+
+
+# ---------------------------------------------------------------------------
+# Cross-workflow pub/sub (Scenario 1)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CrossWorkflowInput:
+    broker_workflow_id: str
+    expected_count: int
+
+
+@workflow.defn
+class BrokerWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        for i in range(count):
+            self.pubsub.publish("events", f"broker-{i}".encode())
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class SubscriberWorkflow:
+    @workflow.run
+    async def run(self, input: CrossWorkflowInput) -> list[str]:
+        return await workflow.execute_activity(
+            "subscribe_to_broker",
+            input,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+
+
+@activity.defn(name="subscribe_to_broker")
+async def subscribe_to_broker(input: CrossWorkflowInput) -> list[str]:
+    client = PubSubClient.create(
+        client=activity.client(),
+        workflow_id=input.broker_workflow_id,
+    )
+    items: list[str] = []
+    async with _async_timeout(15.0):
+        async for item in client.subscribe(
+            topics=["events"], from_offset=0, poll_cooldown=0, result_type=bytes
+        ):
+            items.append(item.data.decode())
+            activity.heartbeat()
+            if len(items) >= input.expected_count:
+                break
+    return items
+
+
+@pytest.mark.asyncio
+async def test_cross_workflow_pubsub(client: Client) -> None:
+    """Workflow B's activity subscribes to events published by Workflow A."""
+    count = 5
+    task_queue = str(uuid.uuid4())
+
+    async with new_worker(
+        client,
+        BrokerWorkflow,
+        SubscriberWorkflow,
+        activities=[subscribe_to_broker],
+        task_queue=task_queue,
+    ):
+        broker_id = f"pubsub-broker-{uuid.uuid4()}"
+        broker_handle = await client.start_workflow(
+            BrokerWorkflow.run,
+            count,
+            id=broker_id,
+            task_queue=task_queue,
+        )
+
+        sub_handle = await client.start_workflow(
+            SubscriberWorkflow.run,
+            CrossWorkflowInput(
+                broker_workflow_id=broker_id,
+                expected_count=count,
+            ),
+            id=f"pubsub-subscriber-{uuid.uuid4()}",
+            task_queue=task_queue,
+        )
+
+        result = await sub_handle.result()
+        assert result == [f"broker-{i}" for i in range(count)]
+
+        # Also verify external subscription still works
+        external_items = await collect_items(
+            client, broker_handle, ["events"], 0, count
+        )
+        assert len(external_items) == count
+
+        await broker_handle.signal(BrokerWorkflow.close)
+
+
+# ---------------------------------------------------------------------------
+# Cross-namespace pub/sub via Nexus (Scenario 2)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class StartBrokerInput:
+    count: int
+    broker_id: str
+
+
+@dataclass
+class NexusCallerInput:
+    count: int
+    broker_id: str
+    endpoint: str
+
+
+@workflow.defn
+class NexusBrokerWorkflow:
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.pubsub = PubSub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> str:
+        for i in range(count):
+            self.pubsub.publish("events", f"nexus-{i}".encode())
+        await workflow.wait_condition(lambda: self._closed)
+        return "done"
+
+
+@nexusrpc.service
+class PubSubNexusService:
+    start_broker: nexusrpc.Operation[StartBrokerInput, str]
+
+
+@nexusrpc.handler.service_handler(service=PubSubNexusService)
+class PubSubNexusHandler:
+    @workflow_run_operation
+    async def start_broker(
+        self, ctx: WorkflowRunOperationContext, input: StartBrokerInput
+    ) -> nexus.WorkflowHandle[str]:
+        return await ctx.start_workflow(
+            NexusBrokerWorkflow.run,
+            input.count,
+            id=input.broker_id,
+        )
+
+
+@workflow.defn
+class NexusCallerWorkflow:
+    @workflow.run
+    async def run(self, input: NexusCallerInput) -> str:
+        nc = workflow.create_nexus_client(
+            service=PubSubNexusService,
+            endpoint=input.endpoint,
+        )
+        return await nc.execute_operation(
+            PubSubNexusService.start_broker,
+            StartBrokerInput(count=input.count, broker_id=input.broker_id),
+        )
+
+
+async def create_cross_namespace_endpoint(
+    client: Client,
+    endpoint_name: str,
+    target_namespace: str,
+    task_queue: str,
+) -> None:
+    await client.operator_service.create_nexus_endpoint(
+        temporalio.api.operatorservice.v1.CreateNexusEndpointRequest(
+            spec=temporalio.api.nexus.v1.EndpointSpec(
+                name=endpoint_name,
+                target=temporalio.api.nexus.v1.EndpointTarget(
+                    worker=temporalio.api.nexus.v1.EndpointTarget.Worker(
+                        namespace=target_namespace,
+                        task_queue=task_queue,
+                    )
+                ),
+            )
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_poll_more_ready_when_response_exceeds_size_limit(
+    client: Client,
+) -> None:
+    """Poll response sets more_ready=True when items exceed ~1MB wire size."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-more-ready-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish items that total well over 1MB in the poll response.
+        # Send in separate signals to stay under the RPC size limit.
+        # Each item is ~200KB; 8 items = ~1.6MB wire (base64 inflates ~33%).
+        chunk = b"x" * 200_000
+        for _ in range(8):
+            await handle.signal(
+                "__pubsub_publish",
+                PublishInput(
+                    items=[PublishEntry(topic="big", data=_wire_bytes(chunk))]
+                ),
+            )
+
+        # First poll from offset 0 — should get some items but not all.
+        # (The update acts as a barrier for all prior publish signals.)
+        result1: PollResult = await handle.execute_update(
+            "__pubsub_poll",
+            PollInput(topics=[], from_offset=0),
+            result_type=PollResult,
+        )
+        assert result1.more_ready is True
+        assert len(result1.items) < 8
+        assert result1.next_offset < 8
+
+        # Continue polling until we have all items
+        all_items = list(result1.items)
+        offset = result1.next_offset
+        last_result: PollResult = result1
+        while len(all_items) < 8:
+            last_result = await handle.execute_update(
+                "__pubsub_poll",
+                PollInput(topics=[], from_offset=offset),
+                result_type=PollResult,
+            )
+            all_items.extend(last_result.items)
+            offset = last_result.next_offset
+        assert len(all_items) == 8
+        # The final poll that drained the log should set more_ready=False
+        assert last_result.more_ready is False
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_subscribe_iterates_through_more_ready(client: Client) -> None:
+    """Subscriber correctly yields all items when polls are size-truncated."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-more-ready-iter-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 8 x 200KB items (~2MB+ wire, exceeds 1MB cap)
+        chunk = b"x" * 200_000
+        for _ in range(8):
+            await handle.signal(
+                "__pubsub_publish",
+                PublishInput(
+                    items=[PublishEntry(topic="big", data=_wire_bytes(chunk))]
+                ),
+            )
+
+        # subscribe() should seamlessly iterate through all 8 items
+        items = await collect_items(client, handle, None, 0, 8, timeout=10.0)
+        assert len(items) == 8
+        for item in items:
+            assert item.data == chunk
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_cross_namespace_nexus_pubsub(
+    client: Client, env: WorkflowEnvironment
+) -> None:
+    """Nexus operation starts a pub/sub broker in another namespace; test subscribes."""
+    if env.supports_time_skipping:
+        pytest.skip("Nexus not supported with time-skipping server")
+
+    count = 5
+    handler_ns = f"handler-ns-{uuid.uuid4().hex[:8]}"
+    task_queue = str(uuid.uuid4())
+    endpoint_name = make_nexus_endpoint_name(task_queue)
+    broker_id = f"nexus-broker-{uuid.uuid4()}"
+
+    # Register the handler namespace with the dev server
+    await client.workflow_service.register_namespace(
+        temporalio.api.workflowservice.v1.RegisterNamespaceRequest(
+            namespace=handler_ns,
+            workflow_execution_retention_period=google.protobuf.duration_pb2.Duration(
+                seconds=86400,
+            ),
+        )
+    )
+
+    handler_client = await Client.connect(
+        client.service_client.config.target_host,
+        namespace=handler_ns,
+    )
+
+    # Create endpoint targeting the handler namespace
+    await create_cross_namespace_endpoint(
+        client,
+        endpoint_name,
+        target_namespace=handler_ns,
+        task_queue=task_queue,
+    )
+
+    # Handler worker in handler namespace
+    async with Worker(
+        handler_client,
+        task_queue=task_queue,
+        workflows=[NexusBrokerWorkflow],
+        nexus_service_handlers=[PubSubNexusHandler()],
+    ):
+        # Caller worker in default namespace
+        caller_tq = str(uuid.uuid4())
+        async with new_worker(
+            client,
+            NexusCallerWorkflow,
+            task_queue=caller_tq,
+        ):
+            # Start caller — invokes Nexus op which starts broker in handler ns
+            caller_handle = await client.start_workflow(
+                NexusCallerWorkflow.run,
+                NexusCallerInput(
+                    count=count,
+                    broker_id=broker_id,
+                    endpoint=endpoint_name,
+                ),
+                id=f"nexus-caller-{uuid.uuid4()}",
+                task_queue=caller_tq,
+            )
+
+            # Wait for the broker workflow to be started by the Nexus operation
+            broker_handle = handler_client.get_workflow_handle(broker_id)
+
+            async def broker_started() -> bool:
+                try:
+                    await broker_handle.describe()
+                    return True
+                except Exception:
+                    return False
+
+            await assert_eq_eventually(
+                True, broker_started, timeout=timedelta(seconds=15)
+            )
+
+            # Subscribe to broker events from the handler namespace
+            items = await collect_items(
+                handler_client, broker_handle, ["events"], 0, count
+            )
+            assert len(items) == count
+            for i in range(count):
+                assert items[i].topic == "events"
+                assert items[i].data == f"nexus-{i}".encode()
+
+            # Clean up — signal broker to close so caller can complete
+            await broker_handle.signal("close")
+            result = await caller_handle.result()
+            assert result == "done"
diff --git a/uv.lock b/uv.lock
index bdc25a507..f0e1ebdb5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -8,6 +8,13 @@ resolution-markers = [
     "python_full_version < '3.11'",
 ]
 
+[options]
+exclude-newer = "2026-04-12T04:20:59.693938Z"
+exclude-newer-span = "P1W"
+
+[options.exclude-newer-package]
+openai-agents = false
+
 [[package]]
 name = "aioboto3"
 version = "15.5.0"
@@ -1812,7 +1819,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" },
     { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" },
     { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" },
-    { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" },
     { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" },
     { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" },
     { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" },
@@ -1820,7 +1826,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" },
     { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" },
     { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" },
     { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" },
     { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" },
     { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" },
@@ -1829,7 +1834,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" },
     { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" },
     { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" },
     { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" },
     { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" },
     { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" },
@@ -1838,7 +1842,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" },
     { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" },
     { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" },
     { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" },
     { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" },
     { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" },
@@ -1847,7 +1850,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" },
     { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" },
     { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" },
     { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" },
     { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" },
     { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" },
@@ -1856,7 +1858,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" },
     { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" },
     { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" },
     { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" },
     { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" },
     { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" },