From f2eef03a4a6ac5677e3031c5dbad19ccef249288 Mon Sep 17 00:00:00 2001 From: Mahimai Raja J Date: Fri, 20 Mar 2026 20:59:11 -0400 Subject: [PATCH] Align install metadata and refresh docs --- README.md | 84 ++++++++------------ docs/.vitepress/config.ts | 4 +- docs/api/pool.md | 142 ++++++++++++++++++++++++++++++---- docs/cli.md | 57 ++++++++++++-- docs/concepts/architecture.md | 29 +++++-- docs/getting-started.md | 31 +++++++- pyproject.toml | 5 -- src/openrtc/cli.py | 20 ++++- src/openrtc/pool.py | 2 +- 9 files changed, 280 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index dd6fa88..243051c 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,6 @@
- - -
A Python framework for running multiple LiveKit voice agents in a single worker process with shared prewarmed models.
@@ -42,8 +39,8 @@ LiveKit worker per agent.
  • Greetings and session options
  • Provider model strings
  • CLI usage
  • +
  • Public API at a glance
  • Project structure
  • -
  • Migration from legacy
  • Contributing
  • @@ -94,13 +91,15 @@ OpenRTC is built to reduce duplicate worker overhead. ## Installation -Install OpenRTC with the common runtime dependencies required for shared -prewarm: +Install OpenRTC from PyPI: ```bash -pip install openrtc[common] +pip install openrtc ``` +`openrtc` depends on `livekit-agents[silero,turn-detector]`, so the runtime +plugins required by shared prewarm are installed with the base package. + If you are developing locally, the repository uses `uv` for environment and command management. @@ -220,6 +219,9 @@ For each incoming room, `AgentPool` resolves the agent in this order: This lets one worker process host several agents while staying compatible with standard LiveKit job and room metadata. +If metadata references an unknown registered name, OpenRTC raises a `ValueError` +instead of silently falling back. + ## Greetings and session options OpenRTC can play a greeting after `ctx.connect()` and pass extra options into @@ -269,7 +271,7 @@ instead of strings. ## CLI usage -OpenRTC includes a small CLI for agent discovery. +OpenRTC includes a CLI for discovery-based workflows. ### List discovered agents @@ -293,6 +295,28 @@ openrtc start --agents-dir ./agents openrtc dev --agents-dir ./agents ``` +Both `start` and `dev` discover agents first and then hand off to the underlying +LiveKit worker runtime. + +## Public API at a glance + +OpenRTC currently exposes: + +- `AgentPool` +- `AgentConfig` +- `AgentDiscoveryConfig` +- `agent_config(...)` + +On `AgentPool`, the primary public methods and properties are: + +- `add(...)` +- `discover(...)` +- `list_agents()` +- `get(name)` +- `remove(name)` +- `run()` +- `server` + ## Project structure ```text @@ -302,50 +326,10 @@ src/openrtc/ └── pool.py ``` -- `pool.py` contains the core `AgentPool` implementation -- `cli.py` provides agent discovery and worker startup commands +- `pool.py` contains the core `AgentPool` implementation and discovery helpers +- `cli.py` provides discovery and worker startup commands - `__init__.py` exposes the public package API -## Migration from legacy `AGENT_*` module globals - -Older prototypes used module-level constants such as `AGENT_NAME` and -`AGENT_STT`. OpenRTC now standardizes on `@agent_config(...)` plus -`AgentPool(...)` defaults. - -Before: - -```python -AGENT_NAME = "restaurant" -AGENT_STT = "deepgram/nova-3:multi" -AGENT_LLM = "openai/gpt-4.1-mini" -AGENT_TTS = "cartesia/sonic-3" -AGENT_GREETING = "Welcome to reservations." -``` - -After: - -```python -from openrtc import agent_config - - -@agent_config( - name="restaurant", - greeting="Welcome to reservations.", -) -class RestaurantAgent(Agent): - ... -``` - -Move shared provider settings into the pool: - -```python -pool = AgentPool( - default_stt="deepgram/nova-3:multi", - default_llm="openai/gpt-4.1-mini", - default_tts="cartesia/sonic-3", -) -``` - ## Contributing Contributions are welcome. Please read [CONTRIBUTING.md](CONTRIBUTING.md) diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index cca1c68..be7f7ba 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -41,10 +41,10 @@ export default defineConfig({ ], }, socialLinks: [ - { icon: 'github', link: 'https://github.com/mahimairaja/openrtc-python' }, + { icon: 'github', link: 'https://github.com/mahimailabs/openrtc' }, ], editLink: { - pattern: 'https://github.com/mahimairaja/openrtc-python/edit/main/docs/:path', + pattern: 'https://github.com/mahimailabs/openrtc/edit/main/docs/:path', text: 'Edit this page on GitHub', }, search: { diff --git a/docs/api/pool.md b/docs/api/pool.md index 635e0c0..fc0c041 100644 --- a/docs/api/pool.md +++ b/docs/api/pool.md @@ -3,7 +3,7 @@ ## Imports ```python -from openrtc import AgentConfig, AgentPool +from openrtc import AgentConfig, AgentDiscoveryConfig, AgentPool, agent_config ``` ## `AgentConfig` @@ -17,19 +17,68 @@ class AgentConfig: llm: Any = None tts: Any = None greeting: str | None = None + session_kwargs: dict[str, Any] = field(default_factory=dict) ``` `AgentConfig` is returned from `AgentPool.add()` and represents a registered LiveKit agent configuration. -## `AgentPool()` +## `AgentDiscoveryConfig` + +```python +@dataclass(slots=True) +class AgentDiscoveryConfig: + name: str | None = None + stt: Any = None + llm: Any = None + tts: Any = None + greeting: str | None = None +``` + +`AgentDiscoveryConfig` stores optional metadata attached to an agent class with +`@agent_config(...)`. + +## `agent_config(...)` + +```python +@agent_config( + name="restaurant", + stt="deepgram/nova-3:multi", + llm="openai/gpt-4.1-mini", + tts="cartesia/sonic-3", + greeting="Welcome to reservations.", +) +class RestaurantAgent(Agent): + ... +``` + +Use `agent_config(...)` to attach discovery metadata to a standard LiveKit +`Agent` subclass. + +## `AgentPool(...)` Create a pool that manages multiple LiveKit agents in one worker process. ```python -pool = AgentPool() +pool = AgentPool( + default_stt="deepgram/nova-3:multi", + default_llm="openai/gpt-4.1-mini", + default_tts="cartesia/sonic-3", + default_greeting="Hello from OpenRTC.", +) +``` + +Constructor defaults are used when an agent registration or discovered agent +module omits those values. + +## `server` + +```python +server = pool.server ``` +Returns the underlying LiveKit `AgentServer` instance. + ## `add()` ```python @@ -41,6 +90,8 @@ pool.add( llm=None, tts=None, greeting=None, + session_kwargs=None, + **session_options, ) ``` @@ -52,6 +103,12 @@ Registers a named LiveKit `Agent` subclass. - names must be unique - `agent_cls` must be a subclass of `livekit.agents.Agent` +### Session options + +- `session_kwargs` forwards a mapping of keyword arguments to `AgentSession` +- direct `**session_options` are also forwarded to `AgentSession` +- when the same key appears in both places, the direct keyword argument wins + ### Returns An `AgentConfig` instance for the registration. @@ -61,6 +118,30 @@ An `AgentConfig` instance for the registration. - `ValueError` for an empty or duplicate name - `TypeError` if `agent_cls` is not a LiveKit `Agent` subclass +## `discover()` + +```python +pool.discover("./agents") +``` + +Discovers Python modules in a directory, imports them, finds a local `Agent` +subclass, and registers it. + +Discovery behavior: + +- skips `__init__.py` +- skips files whose stem starts with `_` +- uses `@agent_config(...)` metadata when present +- otherwise uses the filename stem as the agent name +- falls back to pool defaults for omitted provider and greeting fields + +### Raises + +- `FileNotFoundError` if the directory does not exist +- `NotADirectoryError` if the path is not a directory +- `RuntimeError` if a module cannot be imported or defines no local `Agent` + subclass + ## `list_agents()` ```python @@ -69,6 +150,30 @@ pool.list_agents() Returns registered agent names in registration order. +## `get()` + +```python +pool.get("restaurant") +``` + +Returns a registered `AgentConfig`. + +### Raises + +- `KeyError` if the agent name is unknown + +## `remove()` + +```python +pool.remove("restaurant") +``` + +Removes and returns a registered `AgentConfig`. + +### Raises + +- `KeyError` if the agent name is unknown + ## `run()` ```python @@ -79,22 +184,33 @@ Starts the LiveKit worker application. ### Raises -`RuntimeError` if called before any agents are registered. +- `RuntimeError` if called before any agents are registered + +## Routing behavior + +`AgentPool` resolves the active agent in this order: + +1. `ctx.job.metadata["agent"]` +2. `ctx.job.metadata["demo"]` +3. `ctx.room.metadata["agent"]` +4. `ctx.room.metadata["demo"]` +5. room-name prefix matching such as `restaurant-call-123` +6. the first registered agent + +If metadata references an unknown agent, OpenRTC raises `ValueError`. ## Example ```python -from examples.agents.restaurant import RestaurantAgent +from pathlib import Path + from openrtc import AgentPool -pool = AgentPool() -pool.add( - "restaurant", - RestaurantAgent, - stt="deepgram/nova-3:multi", - llm="openai/gpt-5-mini", - tts="cartesia/sonic-3", +pool = AgentPool( + default_stt="deepgram/nova-3:multi", + default_llm="openai/gpt-4.1-mini", + default_tts="cartesia/sonic-3", ) - +pool.discover(Path("./agents")) pool.run() ``` diff --git a/docs/cli.md b/docs/cli.md index f8972dc..c3ec017 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,17 +1,58 @@ # CLI -OpenRTC exposes a console script named `openrtc`. +OpenRTC exposes a console script named `openrtc` for discovery-based workflows. -## Current status +## Commands -Today the CLI is a placeholder entrypoint that returns success and logs that -full discovery-based CLI commands are planned for a future milestone. +### `openrtc list` -## Run the CLI +Discovers agent modules and prints the resolved registration settings for each +agent. ```bash -openrtc +openrtc list --agents-dir ./agents ``` -Use the programmatic API for production usage today. The CLI page exists so the -published docs accurately reflect the current package surface. +### `openrtc start` + +Discovers agent modules and starts the LiveKit worker in production mode. + +```bash +openrtc start --agents-dir ./agents +``` + +### `openrtc dev` + +Discovers agent modules and starts the LiveKit worker in development mode. + +```bash +openrtc dev --agents-dir ./agents +``` + +## Shared default options + +Each command accepts these optional defaults, which are applied when a +discovered agent does not override them via `@agent_config(...)`: + +- `--default-stt` +- `--default-llm` +- `--default-tts` +- `--default-greeting` + +Example: + +```bash +openrtc list \ + --agents-dir ./examples/agents \ + --default-stt deepgram/nova-3:multi \ + --default-llm openai/gpt-4.1-mini \ + --default-tts cartesia/sonic-3 \ + --default-greeting "Hello from OpenRTC." +``` + +## Notes + +- `--agents-dir` is required for every command. +- `list` returns a non-zero exit code when no discoverable agents are found. +- `start` and `dev` both discover agents before handing off to the underlying + LiveKit worker runtime. diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md index a8b8653..447e0f1 100644 --- a/docs/concepts/architecture.md +++ b/docs/concepts/architecture.md @@ -11,7 +11,17 @@ OpenRTC keeps the public API intentionally narrow. - unique `name` - `agent_cls` subclass - optional `stt`, `llm`, and `tts` providers -- optional `greeting` placeholder for future use +- optional `greeting` generated after `ctx.connect()` +- optional `session_kwargs` forwarded to `AgentSession` + +### `AgentDiscoveryConfig` + +`AgentDiscoveryConfig` stores optional discovery metadata attached by +`@agent_config(...)`: + +- optional explicit `name` +- optional `stt`, `llm`, and `tts` overrides +- optional `greeting` override ### `AgentPool` @@ -25,12 +35,13 @@ are loaded once and reused across sessions. When a room is assigned to the worker: -1. OpenRTC resolves the target agent from job metadata, room metadata, or the - first registered agent. -2. Create an `AgentSession` using the selected agent configuration. +1. OpenRTC resolves the target agent from job metadata, room metadata, room-name + prefix matching, or the first registered agent. +2. It creates an `AgentSession` using the selected agent configuration. 3. Prewarmed VAD and turn detection models are injected from `proc.userdata`. -4. The resolved agent instance is then started and connected to the LiveKit job - context. +4. The resolved agent instance is started for the room. +5. OpenRTC connects the room context. +6. If a greeting is configured, it generates the greeting after connect. ## Shared runtime dependencies @@ -39,8 +50,9 @@ During prewarm, OpenRTC loads: - `livekit.plugins.silero` - `livekit.plugins.turn_detector.multilingual.MultilingualModel` -If those plugins are unavailable, OpenRTC raises a `RuntimeError` explaining -that the package should be installed with the required extras. +These plugins are expected to be available from the package installation. +If they are missing at runtime, OpenRTC raises a `RuntimeError` with install +instructions. ## Why this shape? @@ -48,5 +60,6 @@ This design keeps the package easy to reason about: - routing logic is explicit - worker-scoped dependencies are loaded once +- discovery metadata is opt-in and typed - agent registration stays stable and readable - the public API remains small enough for contributors to extend safely diff --git a/docs/getting-started.md b/docs/getting-started.md index 15418b0..cf02d50 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -11,6 +11,9 @@ OpenRTC currently supports Python `>=3.10,<3.14` and depends on pip install openrtc ``` +The base package includes the LiveKit Silero and turn-detector plugins used by +OpenRTC's shared prewarm path. + If you are contributing locally, install the package in editable mode: ```bash @@ -45,9 +48,12 @@ pool.run() `AgentPool` resolves an agent in this order: -1. `ctx.job.metadata` -2. `ctx.room.metadata` -3. the first registered agent +1. `ctx.job.metadata["agent"]` +2. `ctx.job.metadata["demo"]` +3. `ctx.room.metadata["agent"]` +4. `ctx.room.metadata["demo"]` +5. room name prefix matching, such as `support-call-123` +6. the first registered agent Use JSON metadata with an `agent` field, for example: @@ -57,3 +63,22 @@ Use JSON metadata with an `agent` field, for example: If metadata references an unknown agent name, OpenRTC raises a `ValueError` with a clear message instead of silently falling back. + +## Discovery-based setup + +If you prefer one agent module per file, use discovery with optional +`@agent_config(...)` metadata: + +```python +from pathlib import Path + +from openrtc import AgentPool + +pool = AgentPool( + default_stt="deepgram/nova-3:multi", + default_llm="openai/gpt-4.1-mini", + default_tts="cartesia/sonic-3", +) +pool.discover(Path("./agents")) +pool.run() +``` diff --git a/pyproject.toml b/pyproject.toml index 265d681..adf9771 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,11 +22,6 @@ classifiers = [ license = "MIT" requires-python = ">=3.10,<3.14" dependencies = [ - "livekit-agents~=1.4", -] - -[project.optional-dependencies] -common = [ "livekit-agents[silero,turn-detector]~=1.4", ] diff --git a/src/openrtc/cli.py b/src/openrtc/cli.py index 5df9d66..e63d90c 100644 --- a/src/openrtc/cli.py +++ b/src/openrtc/cli.py @@ -29,19 +29,31 @@ def build_parser() -> argparse.ArgumentParser: ) command_parser.add_argument( "--default-stt", - help="Default STT provider used when an agent module omits AGENT_STT.", + help=( + "Default STT provider used when a discovered agent does not " + "override STT via @agent_config(...)." + ), ) command_parser.add_argument( "--default-llm", - help="Default LLM provider used when an agent module omits AGENT_LLM.", + help=( + "Default LLM provider used when a discovered agent does not " + "override LLM via @agent_config(...)." + ), ) command_parser.add_argument( "--default-tts", - help="Default TTS provider used when an agent module omits AGENT_TTS.", + help=( + "Default TTS provider used when a discovered agent does not " + "override TTS via @agent_config(...)." + ), ) command_parser.add_argument( "--default-greeting", - help=("Default greeting used when an agent module omits AGENT_GREETING."), + help=( + "Default greeting used when a discovered agent does not " + "override greeting via @agent_config(...)." + ), ) return parser diff --git a/src/openrtc/pool.py b/src/openrtc/pool.py index 570a163..bfa44cc 100644 --- a/src/openrtc/pool.py +++ b/src/openrtc/pool.py @@ -484,7 +484,7 @@ def _load_shared_runtime_dependencies(self) -> tuple[Any, type[Any]]: except ModuleNotFoundError as exc: raise RuntimeError( "OpenRTC requires the LiveKit Silero and turn-detector plugins. " - "Install the package with livekit-agents[silero,turn-detector]." + "Reinstall openrtc, or install livekit-agents[silero,turn-detector]." ) from exc return silero, MultilingualModel