From 06d9c243bb096d7224919840ae023023a5608ffd Mon Sep 17 00:00:00 2001 From: Devon White Date: Fri, 26 Sep 2025 13:13:22 -0400 Subject: [PATCH 01/73] New stream guide --- .../realtime-streaming-to-openai/index.mdx | 1266 +++++++++++++++++ 1 file changed, 1266 insertions(+) create mode 100644 website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx new file mode 100644 index 000000000..d69571229 --- /dev/null +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -0,0 +1,1266 @@ +--- +title: Building Real-Time AI Voice Assistants +description: Learn how to create bidirectional audio streaming bridges between telephony platforms and AI real-time APIs +slug: /guides/real-time-ai-voice-assistants +sidebar_label: Real-Time AI Voice Assistants +sidebar_position: 1 +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import { Card, CardGroup } from '@site/src/components/Extras/Card'; +import { Accordion, AccordionGroup } from '@site/src/components/Extras/Accordion'; + +# Building Real-Time AI Voice Assistants + +Create bidirectional audio streaming bridges between telephony and AI + +Transform phone calls into intelligent conversations using WebSocket streaming and modern AI APIs. + +## What You'll Build + + + + + + + +--- + +## Getting Started + +### Prerequisites + +Before you begin, ensure you have: + +1. **SignalWire Account** - [Sign up free](https://signalwire.com) +2. **OpenAI API Key** - [Get access](https://platform.openai.com/api-keys) (requires paid account) +3. **Node.js 20+** - For running the TypeScript server +4. **ngrok** - For local development tunneling ([Install ngrok](https://ngrok.com/download)) +5. **Docker** (optional) - For containerized deployment + +### Required Dependencies + +The project uses these key packages: + +```json +{ + "@openai/agents": "^0.1.4", + "@openai/agents-realtime": "^0.1.4", + "@fastify/websocket": "^10.0.1", + "fastify": "^4.28.1", + "zod": "^3.23.8" +} +``` + +### Quick Overview + +Here's what happens when someone calls your AI assistant: + +
+ Architecture diagram showing call flow from phone through SignalWire to WebSocket server to OpenAI +
Real-time bidirectional audio streaming architecture
+
+ +1. **Call arrives** at SignalWire +2. **Webhook triggers** your server endpoint +3. **WebSocket streams** audio bidirectionally +4. **AI processes** speech in real-time +5. **Responses flow back** to the caller + +--- + +## Step 1: Create the Webhook Endpoint + +Your server needs to handle incoming call webhooks from SignalWire. + +### Set Up the HTTP Endpoint + + + + +```typescript +import Fastify from 'fastify'; + +const app = Fastify(); + +app.post('/incoming-call', async (req, res) => { + const host = req.headers.host; + const wsUrl = `wss://${host}/media-stream`; + + // Return cXML instructions to stream audio + const cxml = ` + + + `; + + res.type('text/xml').send(cxml); +}); + +app.listen({ port: 5050, host: '0.0.0.0' }); +``` + + + + +```javascript +const Fastify = require('fastify'); + +const app = Fastify(); + +app.post('/incoming-call', async (req, res) => { + const host = req.headers.host; + const wsUrl = `wss://${host}/media-stream`; + + // Return cXML instructions to stream audio + const cxml = ` + + + `; + + res.type('text/xml').send(cxml); +}); + +app.listen({ port: 5050, host: '0.0.0.0' }); +``` + + + + +:::tip Webhook URL Format +Your webhook URL must include `/incoming-call` at the end: +- Local: `https://your-ngrok-url.ngrok.io/incoming-call` +- Production: `https://your-domain.com/incoming-call` +::: + +--- + +## Step 2: Build the WebSocket Bridge + +Create a WebSocket server to handle bidirectional audio streaming. + +### Initialize WebSocket Server + + + + +```typescript +import websocket from '@fastify/websocket'; +import { SignalWireRealtimeTransportLayer } from '../transports/SignalWireRealtimeTransportLayer.js'; +import { RealtimeSession, RealtimeAgent } from '@openai/agents/realtime'; +import { AGENT_CONFIG } from '../config.js'; + +interface SignalWireMessage { + event: 'start' | 'media' | 'stop' | 'mark'; + media?: { + payload: string; // Base64 encoded audio + track?: 'inbound' | 'outbound'; + }; + start?: { + streamSid: string; + callSid: string; + mediaFormat?: { + encoding: string; + sampleRate: number; + channels: number; + }; + }; +} + +app.register(websocket); + +app.get('/media-stream', { websocket: true }, async (connection) => { + console.log('πŸ“ž Client connected to WebSocket'); + + try { + // Create SignalWire transport layer with configured audio format + const signalWireTransportLayer = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket: connection, + audioFormat: AGENT_CONFIG.audioFormat + }); + + // Create AI agent and session + const realtimeAgent = new RealtimeAgent(agentConfig); + const session = new RealtimeSession(realtimeAgent, { + transport: signalWireTransportLayer, + model: 'gpt-4o-realtime-preview' + }); + + // Connect to OpenAI Realtime API + await session.connect({ + apiKey: process.env.OPENAI_API_KEY + }); + + // Handle session events + session.on('agent_tool_start', (context, agent, tool, details) => { + console.log('πŸ”§ Tool call started:', details); + }); + + } catch (error) { + console.error('❌ Transport initialization failed:', error); + } +}); +``` + + + + +```javascript +const websocket = require('@fastify/websocket'); +const { SignalWireRealtimeTransportLayer } = require('../transports/SignalWireRealtimeTransportLayer.js'); +const { RealtimeSession, RealtimeAgent } = require('@openai/agents/realtime'); +const { AGENT_CONFIG } = require('../config.js'); + +app.register(websocket); + +app.get('/media-stream', { websocket: true }, async (connection) => { + console.log('πŸ“ž Client connected to WebSocket'); + + try { + // Create SignalWire transport layer with configured audio format + const signalWireTransportLayer = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket: connection, + audioFormat: AGENT_CONFIG.audioFormat + }); + + // Create AI agent and session + const realtimeAgent = new RealtimeAgent(agentConfig); + const session = new RealtimeSession(realtimeAgent, { + transport: signalWireTransportLayer, + model: 'gpt-4o-realtime-preview' + }); + + // Connect to OpenAI Realtime API + await session.connect({ + apiKey: process.env.OPENAI_API_KEY + }); + + // Handle session events + session.on('agent_tool_start', (context, agent, tool, details) => { + console.log('πŸ”§ Tool call started:', details); + }); + + } catch (error) { + console.error('❌ Transport initialization failed:', error); + } +}); +``` + + + + + + +SignalWire sends several types of messages through the WebSocket: + +| Event | Purpose | Key Data | Handled By | +|-------|---------|----------|------------| +| `start` | Connection initialized | `streamSid`, `callSid`, `mediaFormat` | Transport Layer | +| `media` | Audio data packet (~20ms) | Base64 encoded `payload`, `track` | Transport Layer | +| `mark` | Audio playback confirmation | `name` (for timing) | Transport Layer | +| `stop` | Stream ending | None | Transport Layer | + +**Key Features of SignalWireRealtimeTransportLayer:** +- **Automatic audio format conversion** between SignalWire and OpenAI +- **Interruption handling** using `clear` events and mark tracking +- **Base64 encoding/decoding** for audio data +- **Session lifecycle management** with proper cleanup +- **Error recovery** and reconnection handling + +**Audio Format Support:** +- **Input**: G.711 ΞΌ-law (8kHz) or PCM16 (24kHz) from SignalWire +- **Output**: Matches input format automatically +- **OpenAI Integration**: Handles format negotiation transparently + + + +--- + +## Step 3: Integrate with OpenAI Realtime API + +Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. + +### Create the AI Session + + + + +```typescript +import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; +import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; +import { SignalWireRealtimeTransportLayer } from '../transports/SignalWireRealtimeTransportLayer.js'; +import { allTools } from '../tools/index.js'; + +// Configure the AI agent +const agentConfig: RealtimeAgentConfiguration = { + name: 'SignalWire Voice Assistant', + instructions: `You are a helpful and friendly voice assistant. + Always start every conversation by greeting the caller first. + You can help with weather information, time queries, and general conversation. + Be concise and friendly in your responses.`, + tools: allTools, // Weather, time, and other tools + voice: 'alloy' +}; + +async function createAISession(signalWireWebSocket: WebSocket): Promise { + // Create transport layer that bridges SignalWire and OpenAI + const transport = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket, + audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio + }); + + // Create agent and session + const agent = new RealtimeAgent(agentConfig); + const session = new RealtimeSession(agent, { + transport, + model: 'gpt-4o-realtime-preview' + }); + + // Connect to OpenAI + await session.connect({ + apiKey: process.env.OPENAI_API_KEY + }); + + return session; +} +``` + + + + +```javascript +const { RealtimeAgent, RealtimeSession } = require('@openai/agents/realtime'); +const { SignalWireRealtimeTransportLayer } = require('../transports/SignalWireRealtimeTransportLayer.js'); +const { allTools } = require('../tools/index.js'); + +// Configure the AI agent +const agentConfig = { + name: 'SignalWire Voice Assistant', + instructions: `You are a helpful and friendly voice assistant. + Always start every conversation by greeting the caller first. + You can help with weather information, time queries, and general conversation. + Be concise and friendly in your responses.`, + tools: allTools, // Weather, time, and other tools + voice: 'alloy' +}; + +async function createAISession(signalWireWebSocket) { + // Create transport layer that bridges SignalWire and OpenAI + const transport = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket, + audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio + }); + + // Create agent and session + const agent = new RealtimeAgent(agentConfig); + const session = new RealtimeSession(agent, { + transport, + model: 'gpt-4o-realtime-preview' + }); + + // Connect to OpenAI + await session.connect({ + apiKey: process.env.OPENAI_API_KEY + }); + + return session; +} +``` + + + + +### Send Audio Back to Caller + +```typescript +// Audio is automatically handled by SignalWireRealtimeTransportLayer +// The transport layer manages: +// 1. Audio format conversion (g711_ulaw ↔ pcm16) +// 2. Base64 encoding/decoding +// 3. Chunk timing and interruption handling +// 4. Mark events for tracking audio playback + +// Example of session event handling: +session.on('agent_tool_start', (context, agent, tool, details) => { + console.log('πŸ”§ Tool call started:', details); +}); + +session.on('agent_tool_end', (context, agent, tool, result, details) => { + console.log('βœ… Tool call completed:', details); +}); + +session.on('error', (error) => { + console.error('❌ Session error:', error); +}); +``` + +### Environment Configuration + +Set up your environment variables for different deployment scenarios: + + + + +Create a `.env` file in your project root: + +```bash +# Required +OPENAI_API_KEY=sk-your-actual-api-key-here + +# Optional +PORT=5050 +AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio +``` + + + + +For production with Docker secrets: + +```bash +# Create secrets directory +mkdir -p secrets +echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt +``` + +Environment variables in `docker-compose.yml`: +```yaml +environment: + - PORT=5050 + - AUDIO_FORMAT=pcm16 +``` + + + + +:::note Audio Format Options +Choose the right audio format for your use case: +- **g711_ulaw (8kHz)**: Standard telephony quality (default) +- **pcm16 (24kHz)**: High definition audio for demos +::: + +--- + +## Step 4: Add Function Calling + +Enable your AI to execute server-side tools during conversations. + +### Define Tools + + + + +```typescript +import { tool as realtimeTool } from '@openai/agents/realtime'; +import { z } from 'zod'; + +// Weather tool using real US National Weather Service API +const weatherTool = realtimeTool({ + name: 'get_weather', + description: 'Get current weather information for any US city', + parameters: z.object({ + location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)') + }), + execute: async ({ location }) => { + try { + // Step 1: Geocoding - Convert city name to coordinates + const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; + const geocodeResponse = await fetch(geocodeUrl, { + headers: { + 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0' + } + }); + + if (!geocodeResponse.ok) { + return 'Sorry, weather information is currently unavailable.'; + } + + const geocodeData = await geocodeResponse.json(); + if (!geocodeData || geocodeData.length === 0) { + return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; + } + + const lat = parseFloat(geocodeData[0].lat); + const lon = parseFloat(geocodeData[0].lon); + + // Step 2: Get weather from weather.gov + const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; + const pointsResponse = await fetch(pointsUrl); + const pointsData = await pointsResponse.json(); + + const forecastUrl = pointsData.properties?.forecast; + if (!forecastUrl) { + return 'Sorry, weather information is currently unavailable.'; + } + + const forecastResponse = await fetch(forecastUrl); + const forecastData = await forecastResponse.json(); + + const currentPeriod = forecastData.properties?.periods?.[0]; + if (!currentPeriod) { + return 'Sorry, weather information is currently unavailable.'; + } + + // Format response for voice + const cityName = geocodeData[0].display_name.split(',')[0]; + return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; + + } catch (error) { + return 'Sorry, weather information is currently unavailable.'; + } + } +}); + +// Time tool example (no external API required) +const timeTool = realtimeTool({ + name: 'get_time', + description: 'Get the current time in Eastern Time', + parameters: z.object({}), // No parameters needed + execute: async () => { + try { + const now = new Date(); + const easternTime = now.toLocaleString('en-US', { + timeZone: 'America/New_York', + timeZoneName: 'short', + weekday: 'long', + year: 'numeric', + month: 'long', + day: 'numeric', + hour: 'numeric', + minute: '2-digit' + }); + return `The current time in Eastern Time is ${easternTime}.`; + } catch (error) { + return 'Sorry, time information is currently unavailable.'; + } + } +}); + +// Export all tools +export const allTools = [weatherTool, timeTool]; + +// Add to your AI agent configuration +const agentConfig = { + name: 'SignalWire Voice Assistant', + instructions: `You are a helpful and friendly voice assistant. + Always start every conversation by greeting the caller first. + You can help with weather information, time queries, and general conversation. + Be concise and friendly in your responses.`, + tools: allTools, + voice: 'alloy' +}; +``` + + + + +```javascript +const { tool: realtimeTool } = require('@openai/agents/realtime'); +const { z } = require('zod'); + +// Weather tool using real US National Weather Service API +const weatherTool = realtimeTool({ + name: 'get_weather', + description: 'Get current weather information for any US city', + parameters: z.object({ + location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)') + }), + execute: async ({ location }) => { + try { + // Step 1: Geocoding - Convert city name to coordinates + const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; + const geocodeResponse = await fetch(geocodeUrl, { + headers: { + 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0' + } + }); + + if (!geocodeResponse.ok) { + return 'Sorry, weather information is currently unavailable.'; + } + + const geocodeData = await geocodeResponse.json(); + if (!geocodeData || geocodeData.length === 0) { + return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; + } + + const lat = parseFloat(geocodeData[0].lat); + const lon = parseFloat(geocodeData[0].lon); + + // Step 2: Get weather from weather.gov + const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; + const pointsResponse = await fetch(pointsUrl); + const pointsData = await pointsResponse.json(); + + const forecastUrl = pointsData.properties?.forecast; + if (!forecastUrl) { + return 'Sorry, weather information is currently unavailable.'; + } + + const forecastResponse = await fetch(forecastUrl); + const forecastData = await forecastResponse.json(); + + const currentPeriod = forecastData.properties?.periods?.[0]; + if (!currentPeriod) { + return 'Sorry, weather information is currently unavailable.'; + } + + // Format response for voice + const cityName = geocodeData[0].display_name.split(',')[0]; + return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; + + } catch (error) { + return 'Sorry, weather information is currently unavailable.'; + } + } +}); + +// Time tool example (no external API required) +const timeTool = realtimeTool({ + name: 'get_time', + description: 'Get the current time in Eastern Time', + parameters: z.object({}), // No parameters needed + execute: async () => { + try { + const now = new Date(); + const easternTime = now.toLocaleString('en-US', { + timeZone: 'America/New_York', + timeZoneName: 'short', + weekday: 'long', + year: 'numeric', + month: 'long', + day: 'numeric', + hour: 'numeric', + minute: '2-digit' + }); + return `The current time in Eastern Time is ${easternTime}.`; + } catch (error) { + return 'Sorry, time information is currently unavailable.'; + } + } +}); + +// Export all tools +module.exports = { allTools: [weatherTool, timeTool] }; + +// Add to your AI agent configuration +const agentConfig = { + name: 'SignalWire Voice Assistant', + instructions: `You are a helpful and friendly voice assistant. + Always start every conversation by greeting the caller first. + You can help with weather information, time queries, and general conversation. + Be concise and friendly in your responses.`, + tools: allTools, + voice: 'alloy' +}; +``` + + + + + + +1. **User asks**: "What's the weather in New York?" +2. **AI recognizes intent**: Needs weather information +3. **Function call triggered**: `get_weather({ location: "New York" })` +4. **Server executes**: Fetches from weather API +5. **Result returned**: AI incorporates into response +6. **User hears**: "The weather in New York is 72Β°F and sunny." + +All of this happens in real-time during the conversation. + + + +--- + +## Technical Deep Dive + +### The SignalWire Transport Layer + +The `SignalWireRealtimeTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: + +```typescript +// Key features of the transport layer: +const transport = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket: connection, + audioFormat: 'g711_ulaw' // or 'pcm16' +}); + +// Automatic handling of: +// 1. Audio format conversion +// 2. Base64 encoding/decoding +// 3. Interruption detection +// 4. Mark event tracking +// 5. Session cleanup +``` + +**Session Lifecycle:** +1. **WebSocket Connection** β†’ SignalWire connects to `/media-stream` +2. **Transport Creation** β†’ Bridge between SignalWire and OpenAI +3. **AI Session Start** β†’ RealtimeSession connects to OpenAI +4. **Audio Streaming** β†’ Bidirectional real-time audio +5. **Tool Execution** β†’ Function calls processed server-side +6. **Session Cleanup** β†’ Graceful disconnect and resource cleanup + +### Audio Processing Pipeline + +```mermaid +graph LR + A[Phone Call] --> B[SignalWire] + B --> C[WebSocket] + C --> D[Transport Layer] + D --> E[OpenAI Realtime] + E --> D + D --> C + C --> B + B --> A +``` + +**Audio Flow Details:** +- **Inbound**: Phone β†’ SignalWire β†’ Base64 β†’ Transport β†’ ArrayBuffer β†’ OpenAI +- **Outbound**: OpenAI β†’ ArrayBuffer β†’ Transport β†’ Base64 β†’ SignalWire β†’ Phone +- **Latency**: Typically 150-300ms end-to-end +- **Quality**: Depends on codec choice (G.711 vs PCM16) + +--- + +## Audio Processing + +### Codec Selection Guide + +Choose the right audio codec for your use case: + + + + + + +### Configure Audio Format + + + + +```xml + + + + + +``` + + + + +```bash +# In your .env file +AUDIO_FORMAT=pcm16 # or g711_ulaw +``` + + + + +### Advanced Configuration + + + + +The transport layer automatically handles interruptions: + +```typescript +// When user interrupts AI speech: +// 1. Transport detects voice activity +// 2. Sends 'clear' event to SignalWire +// 3. Truncates OpenAI audio at last played position +// 4. Resumes with new user input + +session.on('interruption', (event) => { + console.log('πŸ›‘ User interrupted AI speech'); +}); +``` + + + + +Mark events track audio playback timing: + +```typescript +// Transport sends mark events for each audio chunk +{ + "event": "mark", + "mark": { "name": "item123:45" }, // itemId:chunkNumber + "streamSid": "..." +} + +// Used for precise interruption timing +``` + + + + +Built-in error handling and recovery: + +```typescript +session.on('error', (error) => { + console.error('Session error:', error); + // Transport automatically attempts reconnection +}); + +transport.on('*', (event) => { + if (event.type === 'transport_error') { + // Handle transport-specific errors + console.error('Transport error:', event.error); + } +}); +``` + + + + +:::tip Performance Optimization +For production deployments: +- Use **G.711 ΞΌ-law** for standard phone calls (lower latency) +- Use **PCM16** for high-fidelity demos (better quality) +- Monitor WebSocket connection stability +- Implement connection pooling for high traffic +- Track audio latency metrics +::: + +--- + +## Deployment + +### Local Development + +1. **Install dependencies** + ```bash + npm install + ``` + +2. **Set up environment** + ```bash + cp .env.example .env + # Edit .env with your OpenAI API key + ``` + +3. **Start your server** + ```bash + npm run build + npm start + + # Or for development with hot reload: + npm run dev + ``` + +4. **Expose with ngrok** + ```bash + npx ngrok http 5050 + # Note the HTTPS URL (e.g., https://abc123.ngrok.io) + ``` + +5. **Configure SignalWire webhook** + - Use the ngrok HTTPS URL + `/incoming-call` + - Example: `https://abc123.ngrok.io/incoming-call` + +6. **Test your setup** + ```bash + # Check health endpoint + curl https://abc123.ngrok.io/health + + # Should return: {"status":"healthy","timestamp":"..."} + ``` + +### Production with Docker + + + + +```dockerfile +FROM node:20-alpine + +# Install system dependencies +RUN apk add --no-cache dumb-init + +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies +RUN npm ci --only=production && npm cache clean --force + +# Copy source code +COPY . . + +# Build TypeScript +RUN npm run build + +# Create non-root user +RUN addgroup -g 1001 -S nodejs && \ + adduser -S nodeuser -u 1001 + +# Change ownership and switch to non-root user +RUN chown -R nodeuser:nodejs /app +USER nodeuser + +EXPOSE 5050 + +# Use dumb-init for proper signal handling +ENTRYPOINT ["dumb-init", "--"] +CMD ["node", "dist/index.js"] +``` + + + + +```yaml +services: + signalwire-assistant: + build: . + ports: + - "${PORT:-5050}:${PORT:-5050}" + environment: + - PORT=${PORT:-5050} + - AUDIO_FORMAT=pcm16 + secrets: + - openai_api_key + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5050/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + +secrets: + openai_api_key: + file: ./secrets/openai_api_key.txt +``` + + + + + + +**Security & Secrets:** +- Use Docker secrets or external secret management (AWS Secrets Manager, Azure Key Vault) +- Never commit API keys to version control +- Use non-root user in Docker containers +- Implement proper CORS and rate limiting + +**Monitoring & Observability:** +- Set up health checks (`/health` endpoint included) +- Implement structured logging with correlation IDs +- Monitor WebSocket connection metrics +- Track audio latency and quality metrics +- Set up alerting for failed calls + +**Scalability & Performance:** +- Use horizontal scaling with session affinity +- Implement connection pooling for high traffic +- Consider using Redis for session state if needed +- Monitor memory usage (audio buffers can accumulate) + +**Error Handling:** +- Graceful degradation when OpenAI API is unavailable +- Retry logic with exponential backoff +- Proper WebSocket reconnection handling +- Fallback responses when tools fail + +**Development Workflow:** +```bash +# Local development with hot reload +npm run dev + +# Type checking +npm run typecheck + +# Production build +npm run build && npm start + +# Debug logging +DEBUG=openai-agents:* npm run dev +``` + + + +--- + +## Testing Your Assistant + +:::tip Before Testing +Make sure your server is running and the health check passes: +```bash +curl http://localhost:5050/health +# Should return: {"status":"healthy"} +``` +::: + +### Quick Test with SIP + +1. **Create a SIP address** in your SignalWire project: + - Go to **Call Fabric** > **Addresses** + - Click **Create** > **SIP Address** + - Link it to your cXML resource + +2. **Test with a SIP client:** + ``` + sip:your-sip-address@yourproject.dapp.signalwire.com + ``` + + **Recommended SIP clients:** + - **Desktop:** [Zoiper](https://www.zoiper.com/) (free) + - **Mobile:** Zoiper or [Linphone](https://www.linphone.org/) + - **Web:** Use SignalWire's built-in browser calling + +3. **Expected call flow:** + - Call connects immediately + - AI greets you with "Hello! I'm your AI voice assistant..." + - Try saying: "What's the weather in New York?" + - Try saying: "What time is it?" + - Test interruptions by talking while AI is speaking + +### Alternative: Purchase a Phone Number + +1. **Buy a phone number** in SignalWire +2. **Link it** to your cXML resource +3. **Call normally** from any phone + +### Testing Checklist + +- [ ] **Connection**: Call connects and you hear AI greeting +- [ ] **Audio Quality**: Clear audio both directions +- [ ] **Weather Tool**: Ask "What's the weather in [city]?" +- [ ] **Time Tool**: Ask "What time is it?" +- [ ] **Interruption**: Talk while AI is speaking (should stop gracefully) +- [ ] **Conversation**: Have a natural back-and-forth conversation +- [ ] **Goodbye**: End call normally + +### Debugging Your Implementation + + + + + + + + +**Console Output to Look For:** +```bash +πŸ“‘ Server running on http://0.0.0.0:5050 +πŸ₯ Health check: http://0.0.0.0:5050/health +πŸ”Š Audio format: g711_ulaw (8kHz telephony) +πŸŽ™οΈ Voice: alloy + +# When calls come in: +πŸ“ž Incoming call - Audio format: g711_ulaw, SignalWire codec: default +πŸ“± Client connected to WebSocket +πŸ”§ Tool call started: get_weather +βœ… Tool call completed: get_weather +``` + +--- + +## Common Issues & Solutions + +### Troubleshooting Guide + +| Issue | Cause | Solution | +|-------|-------|----------| +| No audio from AI | Codec mismatch or transport error | Check `AUDIO_FORMAT` env var, verify SignalWire codec setting | +| High latency | Network or buffering issues | Use `g711_ulaw` for lower latency, check network | +| WebSocket disconnections | Network timeout or server overload | Implement reconnection logic, monitor server resources | +| Function calls fail | Network issues or API errors | Add retry logic, check API quotas and keys | +| "Missing OPENAI_API_KEY" | Configuration error | Verify .env file or Docker secrets setup | +| Calls not connecting | Webhook URL issues | Ensure URL is public and includes `/incoming-call` | +| Audio quality poor | Wrong codec configuration | Match audio format between SignalWire and application | +| Memory leaks | Audio buffer accumulation | Monitor memory usage, implement cleanup | +| Session errors | OpenAI API issues | Check API status, implement fallback responses | + +### Debug Checklist + +**Basic Setup:** +- [ ] Webhook URL includes `/incoming-call` endpoint +- [ ] ngrok is running and exposing port 5050 (for local dev) +- [ ] OpenAI API key is properly configured +- [ ] Node.js 20+ is installed +- [ ] All npm dependencies installed (`npm install`) + +**Configuration:** +- [ ] Audio format matches SignalWire codec setting +- [ ] Environment variables properly set +- [ ] Docker secrets configured (if using Docker) +- [ ] Port 5050 is available and not blocked + +**Runtime:** +- [ ] WebSocket connection establishes successfully +- [ ] Function tools are registered and accessible +- [ ] Health check endpoint responds (`/health`) +- [ ] Console logs show proper connection messages +- [ ] No error messages in server logs + +**SignalWire Integration:** +- [ ] cXML resource properly configured +- [ ] SIP address or phone number linked to resource +- [ ] Webhook URL is publicly accessible +- [ ] SignalWire project settings correct + +**Testing:** +- [ ] Can make test calls to SIP address +- [ ] Audio flows both directions +- [ ] AI responds appropriately +- [ ] Function calls (weather, time) work +- [ ] Interruptions handled gracefully + +:::warning Common Mistakes + +**Webhook Configuration:** +- Forgetting `/incoming-call` in your webhook URL (most common!) +- Using HTTP instead of HTTPS for webhook URL +- ngrok URL changing and not updating SignalWire configuration + +**Environment Setup:** +- Missing `.env` file or incorrect API key format +- Using both `.env` and Docker secrets (use only one method) +- Wrong audio format causing codec mismatches + +**Development Issues:** +- Not running `npm run build` after code changes +- Port 5050 already in use by another application +- Network firewall blocking WebSocket connections + +::: + +--- + +## Complete Example + +Ready to see it all in action? + + + +This reference implementation includes: +- βœ… **Complete bidirectional streaming** - Real-time audio with zero buffering +- βœ… **OpenAI Realtime API integration** - Latest GPT-4 Realtime model +- βœ… **Function calling examples** - Weather and time tools included +- βœ… **Production deployment** - Docker, health checks, logging +- βœ… **Error handling** - Graceful fallbacks and recovery +- βœ… **Audio optimization** - Both HD (24kHz) and telephony (8kHz) support +- βœ… **TypeScript** - Full type safety and development experience + +### Key Files to Study + +``` +src/ +β”œβ”€β”€ index.ts # Server setup and initialization +β”œβ”€β”€ config.ts # Environment and agent configuration +β”œβ”€β”€ constants.ts # Application constants and messages +β”œβ”€β”€ routes/ +β”‚ β”œβ”€β”€ webhook.ts # cXML webhook handler (/incoming-call) +β”‚ β”œβ”€β”€ streaming.ts # WebSocket bridge (/media-stream) +β”‚ └── health.ts # Health check endpoint (/health) +β”œβ”€β”€ tools/ +β”‚ β”œβ”€β”€ index.ts # Tool exports +β”‚ β”œβ”€β”€ weather.tool.ts # Weather information tool +β”‚ └── time.tool.ts # Current time tool +β”œβ”€β”€ transports/ +β”‚ └── SignalWireRealtimeTransportLayer.ts # Audio bridge +β”œβ”€β”€ types/ +β”‚ └── index.ts # TypeScript interfaces +└── utils/ + └── logger.ts # Logging utilities +``` + +--- + +## Next Steps + + + + + + + +--- + +## Resources + +### Documentation +- [SignalWire Call Fabric Documentation](/platform/call-fabric) +- [OpenAI Realtime API Guide](https://platform.openai.com/docs/guides/realtime) +- [cXML Reference](/compatibility-api/cxml) +- [@openai/agents SDK Documentation](https://www.npmjs.com/package/@openai/agents) + +### Code Repository +- [GitHub Repository](https://github.com/signalwire/cxml-realtime-agent-stream) - **Complete working example** +- [API Reference](https://github.com/signalwire/cxml-realtime-agent-stream/tree/main/src) - **Explore the source code** + +### Key Implementation Files +- [`src/index.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/index.ts) - Server setup +- [`src/routes/streaming.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/routes/streaming.ts) - WebSocket handler +- [`src/transports/SignalWireRealtimeTransportLayer.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/transports/SignalWireRealtimeTransportLayer.ts) - Audio bridge +- [`src/tools/weather.tool.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/tools/weather.tool.ts) - Function calling example + +--- + +*Need help? Join our [Community Slack](https://signalwire.community) or contact [support@signalwire.com](mailto:support@signalwire.com)* \ No newline at end of file From a16ea1bcb552661eae849ae3a4b484687889e52a Mon Sep 17 00:00:00 2001 From: august Date: Fri, 26 Sep 2025 15:50:11 -0400 Subject: [PATCH 02/73] introduction --- .../realtime-streaming-to-openai/index.mdx | 116 +++++++++--------- 1 file changed, 55 insertions(+), 61 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index d69571229..e1620666a 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1,47 +1,51 @@ --- -title: Building Real-Time AI Voice Assistants +title: Integrate OpenAI R description: Learn how to create bidirectional audio streaming bridges between telephony platforms and AI real-time APIs slug: /guides/real-time-ai-voice-assistants sidebar_label: Real-Time AI Voice Assistants sidebar_position: 1 --- -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; -import { Card, CardGroup } from '@site/src/components/Extras/Card'; -import { Accordion, AccordionGroup } from '@site/src/components/Extras/Accordion'; +# Stream an OpenAI Realtime API agent with cXML -# Building Real-Time AI Voice Assistants +Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML -Create bidirectional audio streaming bridges between telephony and AI +In this guide, we will build a Node.js application that serves a +[cXML Script][cxml] +that initiates a two-way (bidirectional) +[``][bidir-stream] +to the OpenAI Realtime API. +When a caller initiates a SIP or +PSTN +call to the assigned phone number, the SignalWire platform requests and runs the script. -Transform phone calls into intelligent conversations using WebSocket streaming and modern AI APIs. +The key architectural components involved are: -## What You'll Build +- **cXML server:** Our Fastify server serves dynamic cXML to the SignalWire platform. +This gives our application the ability to update the call instructions according to each request. +- **WebSocket bridge:** Enables real-time audio streaming between telephony and AI +- **AI integration:** Natural conversations with OpenAI's Realtime API +- **Function Calling:** Server-side tool execution during conversations - - - - - +Here's what happens when someone calls your application: ---- +```mermaid +flowchart LR + A(Phone call) --> B(SignalWire platform) + B --> C(Your server) + C --> D(WebSocket) + D <--> E(OpenAI API) + B <--> D + D --> F(Tools) +``` -## Getting Started +1. **Call arrives** at SignalWire +2. **Webhook triggers** your server endpoint +3. **WebSocket streams** audio bidirectionally +4. **AI processes** speech in real-time +5. **Responses flow back** to the caller -### Prerequisites +## Prerequisites Before you begin, ensure you have: @@ -51,7 +55,7 @@ Before you begin, ensure you have: 4. **ngrok** - For local development tunneling ([Install ngrok](https://ngrok.com/download)) 5. **Docker** (optional) - For containerized deployment -### Required Dependencies +### Dependencies The project uses these key packages: @@ -65,28 +69,17 @@ The project uses these key packages: } ``` -### Quick Overview - -Here's what happens when someone calls your AI assistant: +--- -
- Architecture diagram showing call flow from phone through SignalWire to WebSocket server to OpenAI -
Real-time bidirectional audio streaming architecture
-
+## Steps -1. **Call arrives** at SignalWire -2. **Webhook triggers** your server endpoint -3. **WebSocket streams** audio bidirectionally -4. **AI processes** speech in real-time -5. **Responses flow back** to the caller - ---- + -## Step 1: Create the Webhook Endpoint +### Create the Webhook Endpoint Your server needs to handle incoming call webhooks from SignalWire. -### Set Up the HTTP Endpoint +**Set Up the HTTP Endpoint** @@ -145,13 +138,11 @@ Your webhook URL must include `/incoming-call` at the end: - Production: `https://your-domain.com/incoming-call` ::: ---- - -## Step 2: Build the WebSocket Bridge +### Build the WebSocket Bridge Create a WebSocket server to handle bidirectional audio streaming. -### Initialize WebSocket Server +**Initialize WebSocket Server** @@ -286,13 +277,11 @@ SignalWire sends several types of messages through the WebSocket: ---- - -## Step 3: Integrate with OpenAI Realtime API +### Integrate with OpenAI Realtime API Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. -### Create the AI Session +**Create the AI Session** @@ -382,7 +371,7 @@ async function createAISession(signalWireWebSocket) { -### Send Audio Back to Caller +**Send Audio Back to Caller** ```typescript // Audio is automatically handled by SignalWireRealtimeTransportLayer @@ -406,7 +395,7 @@ session.on('error', (error) => { }); ``` -### Environment Configuration +**Environment Configuration** Set up your environment variables for different deployment scenarios: @@ -451,13 +440,11 @@ Choose the right audio format for your use case: - **pcm16 (24kHz)**: High definition audio for demos ::: ---- - -## Step 4: Add Function Calling +### Add Function Calling Enable your AI to execute server-side tools during conversations. -### Define Tools +**Define Tools** @@ -683,6 +670,8 @@ All of this happens in real-time during the conversation. + + --- ## Technical Deep Dive @@ -1263,4 +1252,9 @@ src/ --- -*Need help? Join our [Community Slack](https://signalwire.community) or contact [support@signalwire.com](mailto:support@signalwire.com)* \ No newline at end of file +*Need help? Join our [Community Slack](https://signalwire.community) or contact [support@signalwire.com](mailto:support@signalwire.com)* + + + +[cxml]: /compatibility-api/cxml "Documentation for cXML, or Compatibility XML." +[bidir-stream]: /compatibility-api/cxml/voice/stream#bidirectional-stream "Technical reference for creating a bidirectional Stream in cXML." From 5f653f6fc48fad3b4bc6aa6d61530af46760ca6b Mon Sep 17 00:00:00 2001 From: hey-august Date: Fri, 26 Sep 2025 16:19:36 -0400 Subject: [PATCH 03/73] remove nonexistent links --- .../realtime-streaming-to-openai/index.mdx | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index e1620666a..f93a68cb1 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -3,7 +3,7 @@ title: Integrate OpenAI R description: Learn how to create bidirectional audio streaming bridges between telephony platforms and AI real-time APIs slug: /guides/real-time-ai-voice-assistants sidebar_label: Real-Time AI Voice Assistants -sidebar_position: 1 +sidebar_position: 0 --- # Stream an OpenAI Realtime API agent with cXML @@ -1207,30 +1207,9 @@ src/ --- -## Next Steps +## Next steps - - - - - - ---- +todo ## Resources From 87309042199d29c3eabbccd9474a749b58589841 Mon Sep 17 00:00:00 2001 From: hey-august Date: Tue, 30 Sep 2025 15:22:46 -0400 Subject: [PATCH 04/73] begin incorporating readme steps --- .../realtime-streaming-to-openai/index.mdx | 177 ++++++++++++++++-- 1 file changed, 157 insertions(+), 20 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index f93a68cb1..a8b622217 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1,12 +1,21 @@ --- -title: Integrate OpenAI R -description: Learn how to create bidirectional audio streaming bridges between telephony platforms and AI real-time APIs +title: Integrate OpenAI Realtime API with cXML +description: Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML. slug: /guides/real-time-ai-voice-assistants -sidebar_label: Real-Time AI Voice Assistants +sidebar_label: Stream an OpenAI Realtime API agent with cXML sidebar_position: 0 +x-custom: + tags: + - product:ai + - product:voice + - language:nodejs + - language:javascript + - sdk:compatibility --- -# Stream an OpenAI Realtime API agent with cXML +import AddResource from '/docs/main/_common/dashboard/add-resource.mdx'; + +# Stream to OpenAI Realtime API agent with cXML Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML @@ -49,26 +58,153 @@ flowchart LR Before you begin, ensure you have: -1. **SignalWire Account** - [Sign up free](https://signalwire.com) -2. **OpenAI API Key** - [Get access](https://platform.openai.com/api-keys) (requires paid account) -3. **Node.js 20+** - For running the TypeScript server -4. **ngrok** - For local development tunneling ([Install ngrok](https://ngrok.com/download)) -5. **Docker** (optional) - For containerized deployment +- **SignalWire Space** - [Sign up free](https://signalwire.com) +- **OpenAI API Key** - [Get access](https://platform.openai.com/api-keys) (requires paid account) +- **Node.js 20+** - For running the TypeScript server ([Install Node](https://nodejs.org/en/download)) +- **ngrok** or other tunneling service - For local development tunneling ([Install ngrok](https://ngrok.com/download)) +- **Docker** (optional) - For containerized deployment -### Dependencies +--- -The project uses these key packages: + -```json -{ - "@openai/agents": "^0.1.4", - "@openai/agents-realtime": "^0.1.4", - "@fastify/websocket": "^10.0.1", - "fastify": "^4.28.1", - "zod": "^3.23.8" -} +### Clone and install + +Clone the SignalWire Solutions repository, navigate to this example, and install. + +```bash +git clone https://github.com/signalwire/solutions-architecture +cd code/cxml-realtime-agent-stream +npm install +``` + +### Add your API key + +Choose **ONE** method based on how you'll run the application. + + + + +When running the server on your local machine, store your credentials in a `.env` file. + +```bash +cp .env.example .env +``` + +Edit `.env` and add your OpenAI API key: + +```bash title=".env" +OPENAI_API_KEY=sk-your-actual-api-key-here +``` + + + + + +When running the server in production with the Docker container, store your credentials in a `secrets` folder. + +```bash +mkdir secrets +``` + +```bash +echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt +``` + + + + +### Run the application + + + + +```bash +npm run build +npm start ``` + + + + +```bash +docker-compose up --build signalwire-assistant +``` + + + + +Your AI assistant webhook is now running at `http://localhost:5050/incoming-call`. + +:::important +The `/incoming-call` endpoint is where SignalWire sends call data to trigger your AI assistant. +This is the URL you'll configure in your SignalWire cXML resource. +::: + +### Create a cXML Script Resource + +Next, we need to tell SignalWire to request cXML from your server when a call comes in. + +In your SignalWire Dashboard, navigate to [My Resources][resources]. +Click **Create Resource**, select **Script** as the resource type, and choose `cXML`. + +Under `Handle Using`, select `External Url`. +Set the `Primary Script URL` to your server's **webhook endpoint**: + + + +Use ngrok to expose port 5050 on your development machine: + +```bash +ngrok http 5050 +``` + +Append `/incoming-call` to the HTTPS URL returned by ngrok. +https://abc123.ngrok.io/incoming-call + + +For production environments, set your server URL + `/incoming-call`: + ``` + https://your-domain.com/incoming-call + ``` + + + + > **🚨 Critical:** You MUST include `/incoming-call` at the end of your URL. This is the specific webhook endpoint that handles incoming calls. +6. Give it a descriptive name (e.g., "AI Voice Assistant") +7. Create the resource + +> **πŸ“– Learn More:** Follow the [SignalWire Call Fabric Resources Guide](https://developer.signalwire.com/platform/call-fabric/resources) for detailed instructions. + + +### 3. Create a SIP Address + +To test your AI assistant, create a SIP address that connects to your cXML resource: + +1. Now from the resource page of the resource you just created, click the `Addresses * Phone NUmbers` tab +2. Click **Add** to create a new address +3. Select **SIP Address** as the address type +5. Fill out the address information +6. Save the configuration + +> **πŸ“– Learn More:** Follow the [SignalWire Call Fabric Addresses Guide](https://developer.signalwire.com/platform/call-fabric/addresses) for detailed SIP address creation. + + +> **πŸ’‘ Tip:** You can also purchase a regular [phone number](https://developer.signalwire.com/platform/dashboard/get-started/phone-numbers) and link it to your cXML resource if you prefer traditional phone number calling. + +### Step 4: Test It! +**Call the SIP address you created in [Step 3](#3-create-a-sip-address) to test your AI assistant:** +1. **Using a SIP Phone or Softphone:** + - Dial: `sip:your-sip-address@yourproject.dapp.signalwire.com` + - Replace with the actual SIP address you created in your SignalWire resource +2. **The call flow will be:** + - Your SIP call β†’ SignalWire β†’ Your webhook endpoint β†’ AI assistant +> **πŸ“± Alternative:** If you purchased a regular phone number and linked it to your cXML resource, you can also call that number directly. +> **πŸ”§ Troubleshooting:** If you haven't set up ngrok yet, go back to [SignalWire Setup](#signalwire-setup) to expose your local server. + + + --- ## Steps @@ -399,7 +535,7 @@ session.on('error', (error) => { Set up your environment variables for different deployment scenarios: - + Create a `.env` file in your project root: @@ -1237,3 +1373,4 @@ todo [cxml]: /compatibility-api/cxml "Documentation for cXML, or Compatibility XML." [bidir-stream]: /compatibility-api/cxml/voice/stream#bidirectional-stream "Technical reference for creating a bidirectional Stream in cXML." +[resources]: https://my.signalwire.com?page=resources "The My Resources page of your SignalWire Dashboard." \ No newline at end of file From fc2832d773993b3697e6675b7ff5653f296fc124 Mon Sep 17 00:00:00 2001 From: hey-august Date: Fri, 3 Oct 2025 18:51:38 -0400 Subject: [PATCH 05/73] guide --- .../realtime-streaming-to-openai/index.mdx | 190 ++++++++---------- 1 file changed, 88 insertions(+), 102 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index a8b622217..15ae5f682 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -2,7 +2,7 @@ title: Integrate OpenAI Realtime API with cXML description: Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML. slug: /guides/real-time-ai-voice-assistants -sidebar_label: Stream an OpenAI Realtime API agent with cXML +sidebar_label: Stream an OpenAI Realtime API agent sidebar_position: 0 x-custom: tags: @@ -14,10 +14,12 @@ x-custom: --- import AddResource from '/docs/main/_common/dashboard/add-resource.mdx'; +import ResourcesFyi from '/docs/main/_common/call-fabric/resources-fyi-card.mdx'; +import { MdCode } from "react-icons/md"; # Stream to OpenAI Realtime API agent with cXML -Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML +Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming via a cXML Script In this guide, we will build a Node.js application that serves a [cXML Script][cxml] @@ -28,6 +30,8 @@ When a caller initiates a SIP or PSTN call to the assigned phone number, the SignalWire platform requests and runs the script. +{/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. + The key architectural components involved are: - **cXML server:** Our Fastify server serves dynamic cXML to the SignalWire platform. @@ -39,13 +43,10 @@ This gives our application the ability to update the call instructions according Here's what happens when someone calls your application: ```mermaid -flowchart LR +flowchart TD A(Phone call) --> B(SignalWire platform) - B --> C(Your server) - C --> D(WebSocket) - D <--> E(OpenAI API) - B <--> D - D --> F(Tools) + B --> |Request cXML Script| C(Your server) + B --> |Bidirectional WebSocket connection| D(OpenAI API) ``` 1. **Call arrives** at SignalWire @@ -54,22 +55,28 @@ flowchart LR 4. **AI processes** speech in real-time 5. **Responses flow back** to the caller +*/} + ## Prerequisites Before you begin, ensure you have: -- **SignalWire Space** - [Sign up free](https://signalwire.com) +- **SignalWire Space** - [Sign up free](https://signalwire.com/signup) - **OpenAI API Key** - [Get access](https://platform.openai.com/api-keys) (requires paid account) - **Node.js 20+** - For running the TypeScript server ([Install Node](https://nodejs.org/en/download)) - **ngrok** or other tunneling service - For local development tunneling ([Install ngrok](https://ngrok.com/download)) - **Docker** (optional) - For containerized deployment ---- +## Steps ### Clone and install +
+ +
+ Clone the SignalWire Solutions repository, navigate to this example, and install. ```bash @@ -78,9 +85,25 @@ cd code/cxml-realtime-agent-stream npm install ``` -### Add your API key +
+ +
+ +} + > +The SignalWire Solutions repository + + +
+ +
-Choose **ONE** method based on how you'll run the application. +### Add OpenAI credentials + +Select **Local** or **Docker** @@ -114,7 +137,7 @@ echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt -### Run the application +### Run application @@ -137,20 +160,30 @@ docker-compose up --build signalwire-assistant Your AI assistant webhook is now running at `http://localhost:5050/incoming-call`. -:::important -The `/incoming-call` endpoint is where SignalWire sends call data to trigger your AI assistant. -This is the URL you'll configure in your SignalWire cXML resource. -::: - -### Create a cXML Script Resource +### Create a cXML Script Next, we need to tell SignalWire to request cXML from your server when a call comes in. -In your SignalWire Dashboard, navigate to [My Resources][resources]. -Click **Create Resource**, select **Script** as the resource type, and choose `cXML`. +
+ +
+ +- Navigate to [My Resources][resources] in your Dashboard. +- Click **Create Resource**, select **Script** as the resource type, and choose `cXML`. +- Under `Handle Using`, select `External Url`. +- Set the `Primary Script URL` to your server's **webhook endpoint**. + +Select the **Local** tab below if you ran the application locally, and the **Docker** tab if you're running it with Docker. + +
+ +
+ + + +
-Under `Handle Using`, select `External Url`. -Set the `Primary Script URL` to your server's **webhook endpoint**: +
@@ -171,43 +204,36 @@ For production environments, set your server URL + `/incoming-call`: - > **🚨 Critical:** You MUST include `/incoming-call` at the end of your URL. This is the specific webhook endpoint that handles incoming calls. -6. Give it a descriptive name (e.g., "AI Voice Assistant") -7. Create the resource - -> **πŸ“– Learn More:** Follow the [SignalWire Call Fabric Resources Guide](https://developer.signalwire.com/platform/call-fabric/resources) for detailed instructions. - - -### 3. Create a SIP Address +:::important set routes +For this example, you **must** include `/incoming-call` at the end of your URL. This is the specific webhook endpoint that our application uses to handle incoming calls. +::: -To test your AI assistant, create a SIP address that connects to your cXML resource: +- Give the cXML Script a descriptive name, such as "AI Voice Assistant". +- Save your new Resource. -1. Now from the resource page of the resource you just created, click the `Addresses * Phone NUmbers` tab -2. Click **Add** to create a new address -3. Select **SIP Address** as the address type -5. Fill out the address information -6. Save the configuration +### Assign SIP address or phone number -> **πŸ“– Learn More:** Follow the [SignalWire Call Fabric Addresses Guide](https://developer.signalwire.com/platform/call-fabric/addresses) for detailed SIP address creation. +To test your AI assistant, create a SIP address or phone number and assign it as a handler for your cXML Script Resource. +- From the [My Resources][resources] tab, select your cXML Script +- Open the **Addresses & Phone NUmbers** tab +- Click **Add** +- Select **SIP Address** or **Phone Number** +- Fill out any required details +- Save the configuration -> **πŸ’‘ Tip:** You can also purchase a regular [phone number](https://developer.signalwire.com/platform/dashboard/get-started/phone-numbers) and link it to your cXML resource if you prefer traditional phone number calling. +### Test application -### Step 4: Test It! -**Call the SIP address you created in [Step 3](#3-create-a-sip-address) to test your AI assistant:** -1. **Using a SIP Phone or Softphone:** - - Dial: `sip:your-sip-address@yourproject.dapp.signalwire.com` - - Replace with the actual SIP address you created in your SignalWire resource -2. **The call flow will be:** - - Your SIP call β†’ SignalWire β†’ Your webhook endpoint β†’ AI assistant -> **πŸ“± Alternative:** If you purchased a regular phone number and linked it to your cXML resource, you can also call that number directly. -> **πŸ”§ Troubleshooting:** If you haven't set up ngrok yet, go back to [SignalWire Setup](#signalwire-setup) to expose your local server. +Dial the SIP address or phone number assigned to your cXML Script. +You should now be speaking to your newly created agent!
--- -## Steps +{/* draft + +## How it works @@ -1224,7 +1250,9 @@ curl http://localhost:5050/health βœ… Tool call completed: get_weather ``` ---- +*/} + +{/* Needs validation ## Common Issues & Solutions @@ -1277,69 +1305,27 @@ curl http://localhost:5050/health - [ ] Function calls (weather, time) work - [ ] Interruptions handled gracefully -:::warning Common Mistakes - -**Webhook Configuration:** -- Forgetting `/incoming-call` in your webhook URL (most common!) -- Using HTTP instead of HTTPS for webhook URL -- ngrok URL changing and not updating SignalWire configuration - -**Environment Setup:** -- Missing `.env` file or incorrect API key format -- Using both `.env` and Docker secrets (use only one method) -- Wrong audio format causing codec mismatches - -**Development Issues:** -- Not running `npm run build` after code changes -- Port 5050 already in use by another application -- Network firewall blocking WebSocket connections - -::: - ---- +*/} ## Complete Example Ready to see it all in action? } href="https://github.com/signalwire/cxml-realtime-agent-stream" /> This reference implementation includes: -- βœ… **Complete bidirectional streaming** - Real-time audio with zero buffering -- βœ… **OpenAI Realtime API integration** - Latest GPT-4 Realtime model -- βœ… **Function calling examples** - Weather and time tools included -- βœ… **Production deployment** - Docker, health checks, logging -- βœ… **Error handling** - Graceful fallbacks and recovery -- βœ… **Audio optimization** - Both HD (24kHz) and telephony (8kHz) support -- βœ… **TypeScript** - Full type safety and development experience - -### Key Files to Study - -``` -src/ -β”œβ”€β”€ index.ts # Server setup and initialization -β”œβ”€β”€ config.ts # Environment and agent configuration -β”œβ”€β”€ constants.ts # Application constants and messages -β”œβ”€β”€ routes/ -β”‚ β”œβ”€β”€ webhook.ts # cXML webhook handler (/incoming-call) -β”‚ β”œβ”€β”€ streaming.ts # WebSocket bridge (/media-stream) -β”‚ └── health.ts # Health check endpoint (/health) -β”œβ”€β”€ tools/ -β”‚ β”œβ”€β”€ index.ts # Tool exports -β”‚ β”œβ”€β”€ weather.tool.ts # Weather information tool -β”‚ └── time.tool.ts # Current time tool -β”œβ”€β”€ transports/ -β”‚ └── SignalWireRealtimeTransportLayer.ts # Audio bridge -β”œβ”€β”€ types/ -β”‚ └── index.ts # TypeScript interfaces -└── utils/ - └── logger.ts # Logging utilities -``` +- **Complete bidirectional streaming** - Real-time audio with zero buffering +- **OpenAI Realtime API integration** - Latest GPT-4 Realtime model +- **Function calling examples** - Weather and time tools included +- **Production deployment** - Docker, health checks, logging +- **Error handling** - Graceful fallbacks and recovery +- **Audio optimization** - Both HD (24kHz) and telephony (8kHz) support +- **TypeScript** - Full type safety and development experience --- From 1b9b266cce0567e174e6ad35c18b1cabb6599bff Mon Sep 17 00:00:00 2001 From: hey-august Date: Mon, 6 Oct 2025 09:20:56 -0400 Subject: [PATCH 06/73] cleanup conclusion --- .../realtime-streaming-to-openai/index.mdx | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 15ae5f682..08990c7e5 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1315,7 +1315,7 @@ Ready to see it all in action? title="SignalWire + OpenAI Realtime" description="Production-ready implementation with all features" icon={} - href="https://github.com/signalwire/cxml-realtime-agent-stream" + href="https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream" /> This reference implementation includes: @@ -1329,10 +1329,6 @@ This reference implementation includes: --- -## Next steps - -todo - ## Resources ### Documentation @@ -1341,22 +1337,9 @@ todo - [cXML Reference](/compatibility-api/cxml) - [@openai/agents SDK Documentation](https://www.npmjs.com/package/@openai/agents) -### Code Repository -- [GitHub Repository](https://github.com/signalwire/cxml-realtime-agent-stream) - **Complete working example** -- [API Reference](https://github.com/signalwire/cxml-realtime-agent-stream/tree/main/src) - **Explore the source code** - -### Key Implementation Files -- [`src/index.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/index.ts) - Server setup -- [`src/routes/streaming.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/routes/streaming.ts) - WebSocket handler -- [`src/transports/SignalWireRealtimeTransportLayer.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/transports/SignalWireRealtimeTransportLayer.ts) - Audio bridge -- [`src/tools/weather.tool.ts`](https://github.com/signalwire/cxml-realtime-agent-stream/blob/main/src/tools/weather.tool.ts) - Function calling example - ---- - -*Need help? Join our [Community Slack](https://signalwire.community) or contact [support@signalwire.com](mailto:support@signalwire.com)* - [cxml]: /compatibility-api/cxml "Documentation for cXML, or Compatibility XML." [bidir-stream]: /compatibility-api/cxml/voice/stream#bidirectional-stream "Technical reference for creating a bidirectional Stream in cXML." -[resources]: https://my.signalwire.com?page=resources "The My Resources page of your SignalWire Dashboard." \ No newline at end of file +[resources]: https://my.signalwire.com?page=resources "The My Resources page of your SignalWire Dashboard." +[repo]: https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream "This project's GitHub repository." \ No newline at end of file From a667b02b9a9e259e9472ef2876aef513a2f10b90 Mon Sep 17 00:00:00 2001 From: hey-august Date: Mon, 6 Oct 2025 11:04:51 -0400 Subject: [PATCH 07/73] 'how it works' --- .../realtime-streaming-to-openai/index.mdx | 139 +++++++----------- 1 file changed, 57 insertions(+), 82 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 08990c7e5..3dbdff516 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -19,7 +19,7 @@ import { MdCode } from "react-icons/md"; # Stream to OpenAI Realtime API agent with cXML -Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming via a cXML Script +Put OpenAI Speech-to-Speech models on the phone with cXML `` In this guide, we will build a Node.js application that serves a [cXML Script][cxml] @@ -28,7 +28,8 @@ that initiates a two-way (bidirectional) to the OpenAI Realtime API. When a caller initiates a SIP or PSTN -call to the assigned phone number, the SignalWire platform requests and runs the script. +call to the assigned phone number, +the SignalWire platform requests and runs the script. {/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. @@ -38,14 +39,14 @@ The key architectural components involved are: This gives our application the ability to update the call instructions according to each request. - **WebSocket bridge:** Enables real-time audio streaming between telephony and AI - **AI integration:** Natural conversations with OpenAI's Realtime API -- **Function Calling:** Server-side tool execution during conversations +- **Function calling:** Server-side tool execution during conversations Here's what happens when someone calls your application: ```mermaid flowchart TD A(Phone call) --> B(SignalWire platform) - B --> |Request cXML Script| C(Your server) + B --> |Request cXML Script via webhook| C(Your server) B --> |Bidirectional WebSocket connection| D(OpenAI API) ``` @@ -57,6 +58,13 @@ flowchart TD */} +Wondering why this guide uses cXML to stream to OpenAI, instead of using +the [native SWML AI integration](/swml/ai)? +Since OpenAI's Realtime API is built for Speech-to-Speech (or "Voice-to-Voice") models, +the SignalWire platform must stream audio directly to and from OpenAI +instead of handling the STT, TTS, and LLM aspects with our integrated toolchain. +This guide showcases the flexibility of the SignalWire platform to integrate with emerging unified audio models. + ## Prerequisites Before you begin, ensure you have: @@ -67,7 +75,7 @@ Before you begin, ensure you have: - **ngrok** or other tunneling service - For local development tunneling ([Install ngrok](https://ngrok.com/download)) - **Docker** (optional) - For containerized deployment -## Steps +## Quickstart @@ -160,6 +168,14 @@ docker-compose up --build signalwire-assistant Your AI assistant webhook is now running at `http://localhost:5050/incoming-call`. +:::tip Health check +Make sure your server is running and the health check passes: +```bash +curl http://localhost:5050/health +# Should return: {"status":"healthy"} +``` +::: + ### Create a cXML Script Next, we need to tell SignalWire to request cXML from your server when a call comes in. @@ -229,19 +245,21 @@ You should now be speaking to your newly created agent! + + --- -{/* draft + ## How it works - + -### Create the Webhook Endpoint + -Your server needs to handle incoming call webhooks from SignalWire. +First, your server needs to handle incoming call webhooks from SignalWire. -**Set Up the HTTP Endpoint** +**Set up the HTTP endpoint** @@ -300,9 +318,11 @@ Your webhook URL must include `/incoming-call` at the end: - Production: `https://your-domain.com/incoming-call` ::: -### Build the WebSocket Bridge + + + -Create a WebSocket server to handle bidirectional audio streaming. +Next, we will create a WebSocket server to handle bidirectional audio streaming. **Initialize WebSocket Server** @@ -414,6 +434,8 @@ app.get('/media-stream', { websocket: true }, async (connection) => {
+ + SignalWire sends several types of messages through the WebSocket: @@ -439,6 +461,8 @@ SignalWire sends several types of messages through the WebSocket: + + ### Integrate with OpenAI Realtime API Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. @@ -602,6 +626,10 @@ Choose the right audio format for your use case: - **pcm16 (24kHz)**: High definition audio for demos ::: + + + + ### Add Function Calling Enable your AI to execute server-side tools during conversations. @@ -832,8 +860,6 @@ All of this happens in real-time during the conversation. - - --- ## Technical Deep Dive @@ -1161,57 +1187,25 @@ DEBUG=openai-agents:* npm run dev --- -## Testing Your Assistant - -:::tip Before Testing -Make sure your server is running and the health check passes: +**Console Output to Look For:** ```bash -curl http://localhost:5050/health -# Should return: {"status":"healthy"} -``` -::: - -### Quick Test with SIP - -1. **Create a SIP address** in your SignalWire project: - - Go to **Call Fabric** > **Addresses** - - Click **Create** > **SIP Address** - - Link it to your cXML resource - -2. **Test with a SIP client:** - ``` - sip:your-sip-address@yourproject.dapp.signalwire.com - ``` - - **Recommended SIP clients:** - - **Desktop:** [Zoiper](https://www.zoiper.com/) (free) - - **Mobile:** Zoiper or [Linphone](https://www.linphone.org/) - - **Web:** Use SignalWire's built-in browser calling - -3. **Expected call flow:** - - Call connects immediately - - AI greets you with "Hello! I'm your AI voice assistant..." - - Try saying: "What's the weather in New York?" - - Try saying: "What time is it?" - - Test interruptions by talking while AI is speaking - -### Alternative: Purchase a Phone Number +πŸ“‘ Server running on http://0.0.0.0:5050 +πŸ₯ Health check: http://0.0.0.0:5050/health +πŸ”Š Audio format: g711_ulaw (8kHz telephony) +πŸŽ™οΈ Voice: alloy -1. **Buy a phone number** in SignalWire -2. **Link it** to your cXML resource -3. **Call normally** from any phone +# When calls come in: +πŸ“ž Incoming call - Audio format: g711_ulaw, SignalWire codec: default +πŸ“± Client connected to WebSocket +πŸ”§ Tool call started: get_weather +βœ… Tool call completed: get_weather +``` -### Testing Checklist +{/* Needs validation -- [ ] **Connection**: Call connects and you hear AI greeting -- [ ] **Audio Quality**: Clear audio both directions -- [ ] **Weather Tool**: Ask "What's the weather in [city]?" -- [ ] **Time Tool**: Ask "What time is it?" -- [ ] **Interruption**: Talk while AI is speaking (should stop gracefully) -- [ ] **Conversation**: Have a natural back-and-forth conversation -- [ ] **Goodbye**: End call normally +## Common Issues & Solutions -### Debugging Your Implementation +### Debugging -**Console Output to Look For:** -```bash -πŸ“‘ Server running on http://0.0.0.0:5050 -πŸ₯ Health check: http://0.0.0.0:5050/health -πŸ”Š Audio format: g711_ulaw (8kHz telephony) -πŸŽ™οΈ Voice: alloy - -# When calls come in: -πŸ“ž Incoming call - Audio format: g711_ulaw, SignalWire codec: default -πŸ“± Client connected to WebSocket -πŸ”§ Tool call started: get_weather -βœ… Tool call completed: get_weather -``` - -*/} - -{/* Needs validation - -## Common Issues & Solutions - ### Troubleshooting Guide | Issue | Cause | Solution | @@ -1307,7 +1281,7 @@ curl http://localhost:5050/health */} -## Complete Example +## Complete example Ready to see it all in action? @@ -1342,4 +1316,5 @@ This reference implementation includes: [cxml]: /compatibility-api/cxml "Documentation for cXML, or Compatibility XML." [bidir-stream]: /compatibility-api/cxml/voice/stream#bidirectional-stream "Technical reference for creating a bidirectional Stream in cXML." [resources]: https://my.signalwire.com?page=resources "The My Resources page of your SignalWire Dashboard." -[repo]: https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream "This project's GitHub repository." \ No newline at end of file +[repo]: https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream "This project's GitHub repository." +[openai-realtime-api]: https://platform.openai.com/docs/guides/realtime "The OpenAI Realtime API" \ No newline at end of file From d7686c75a2af26ee4e481430f2db96ecdfa6e0c8 Mon Sep 17 00:00:00 2001 From: hey-august Date: Mon, 6 Oct 2025 11:15:15 -0400 Subject: [PATCH 08/73] fix link --- .../guides/voice/nodejs/realtime-streaming-to-openai/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 3dbdff516..eefdaa4ac 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -59,7 +59,7 @@ flowchart TD */} Wondering why this guide uses cXML to stream to OpenAI, instead of using -the [native SWML AI integration](/swml/ai)? +the [native SWML AI integration](/swml/methods/ai)? Since OpenAI's Realtime API is built for Speech-to-Speech (or "Voice-to-Voice") models, the SignalWire platform must stream audio directly to and from OpenAI instead of handling the STT, TTS, and LLM aspects with our integrated toolchain. From 8110a342d4fb52fa7c9fb01e619cb5e3ff97fe9a Mon Sep 17 00:00:00 2001 From: hey-august Date: Mon, 6 Oct 2025 18:41:43 -0400 Subject: [PATCH 09/73] more reorganization, accordions --- .../realtime-streaming-to-openai/index.mdx | 108 ++++++++---------- 1 file changed, 47 insertions(+), 61 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index eefdaa4ac..c744bbcd7 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -232,11 +232,9 @@ For this example, you **must** include `/incoming-call` at the end of your URL. To test your AI assistant, create a SIP address or phone number and assign it as a handler for your cXML Script Resource. - From the [My Resources][resources] tab, select your cXML Script -- Open the **Addresses & Phone NUmbers** tab -- Click **Add** -- Select **SIP Address** or **Phone Number** -- Fill out any required details -- Save the configuration +- Open the **Addresses & Phone Numbers** tab +- Click **Add**, and select either **SIP Address** or **Phone Number** +- Fill out any required details, and save the configuration ### Test application @@ -255,7 +253,7 @@ You should now be speaking to your newly created agent! - + First, your server needs to handle incoming call webhooks from SignalWire. @@ -436,18 +434,43 @@ app.get('/media-stream', { websocket: true }, async (connection) => { - + + +The `SignalWireRealtimeTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: + +```typescript +// Key features of the transport layer: +const transport = new SignalWireRealtimeTransportLayer({ + signalWireWebSocket: connection, + audioFormat: 'g711_ulaw' // or 'pcm16' +}); + +// Automatic handling of: +// 1. Audio format conversion +// 2. Base64 encoding/decoding +// 3. Interruption detection +// 4. Mark event tracking +// 5. Session cleanup +``` + +**Session Lifecycle:** +1. **WebSocket connection** β†’ SignalWire connects to `/media-stream` +2. **Transport creation** β†’ Bridge between SignalWire and OpenAI +3. **AI session start** β†’ RealtimeSession connects to OpenAI +4. **Audio streaming** β†’ Bidirectional real-time audio +5. **Tool execution** β†’ Function calls processed server-side +6. **Session cleanup** β†’ Graceful disconnect and resource cleanup SignalWire sends several types of messages through the WebSocket: -| Event | Purpose | Key Data | Handled By | -|-------|---------|----------|------------| -| `start` | Connection initialized | `streamSid`, `callSid`, `mediaFormat` | Transport Layer | -| `media` | Audio data packet (~20ms) | Base64 encoded `payload`, `track` | Transport Layer | -| `mark` | Audio playback confirmation | `name` (for timing) | Transport Layer | -| `stop` | Stream ending | None | Transport Layer | +| Event | Purpose | Key data | +|-------|---------|----------| +| `start` | Connection initialized | `streamSid`, `callSid`, `mediaFormat` | +| `media` | Audio data packet (~20ms) | Base64 encoded `payload`, `track` | +| `mark` | Audio playback confirmation | `name` (for timing) | +| `stop` | Stream ending | None | -**Key Features of SignalWireRealtimeTransportLayer:** +**Key features** - **Automatic audio format conversion** between SignalWire and OpenAI - **Interruption handling** using `clear` events and mark tracking - **Base64 encoding/decoding** for audio data @@ -461,9 +484,7 @@ SignalWire sends several types of messages through the WebSocket: - - -### Integrate with OpenAI Realtime API + Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. @@ -586,7 +607,7 @@ session.on('error', (error) => { Set up your environment variables for different deployment scenarios: - + Create a `.env` file in your project root: @@ -628,9 +649,7 @@ Choose the right audio format for your use case: - - -### Add Function Calling + Enable your AI to execute server-side tools during conversations. @@ -847,8 +866,6 @@ const agentConfig = {
- - 1. **User asks**: "What's the weather in New York?" 2. **AI recognizes intent**: Needs weather information 3. **Function call triggered**: `get_weather({ location: "New York" })` @@ -860,37 +877,12 @@ All of this happens in real-time during the conversation. + + --- ## Technical Deep Dive -### The SignalWire Transport Layer - -The `SignalWireRealtimeTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: - -```typescript -// Key features of the transport layer: -const transport = new SignalWireRealtimeTransportLayer({ - signalWireWebSocket: connection, - audioFormat: 'g711_ulaw' // or 'pcm16' -}); - -// Automatic handling of: -// 1. Audio format conversion -// 2. Base64 encoding/decoding -// 3. Interruption detection -// 4. Mark event tracking -// 5. Session cleanup -``` - -**Session Lifecycle:** -1. **WebSocket Connection** β†’ SignalWire connects to `/media-stream` -2. **Transport Creation** β†’ Bridge between SignalWire and OpenAI -3. **AI Session Start** β†’ RealtimeSession connects to OpenAI -4. **Audio Streaming** β†’ Bidirectional real-time audio -5. **Tool Execution** β†’ Function calls processed server-side -6. **Session Cleanup** β†’ Graceful disconnect and resource cleanup - ### Audio Processing Pipeline ```mermaid @@ -1026,7 +1018,7 @@ For production deployments: ## Deployment -### Local Development +### Local development 1. **Install dependencies** ```bash @@ -1283,7 +1275,10 @@ DEBUG=openai-agents:* npm run dev ## Complete example -Ready to see it all in action? +See the GitHub repo for a complete working example, including +weather and time function examples, +error handling, +and a production Docker setup. -This reference implementation includes: -- **Complete bidirectional streaming** - Real-time audio with zero buffering -- **OpenAI Realtime API integration** - Latest GPT-4 Realtime model -- **Function calling examples** - Weather and time tools included -- **Production deployment** - Docker, health checks, logging -- **Error handling** - Graceful fallbacks and recovery -- **Audio optimization** - Both HD (24kHz) and telephony (8kHz) support -- **TypeScript** - Full type safety and development experience - --- ## Resources From 13941b745db3772e314050bed1527d7a34d03f75 Mon Sep 17 00:00:00 2001 From: hey-august Date: Mon, 6 Oct 2025 18:59:11 -0400 Subject: [PATCH 10/73] claude hallucinated emoji icons for ``s --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index c744bbcd7..bd9db9991 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1,7 +1,7 @@ --- title: Integrate OpenAI Realtime API with cXML description: Put OpenAI Speech-to-Speech models on the phone with bidirectional streaming and cXML. -slug: /guides/real-time-ai-voice-assistants +slug: /compatibility-api/cxml/stream-openai-realtime sidebar_label: Stream an OpenAI Realtime API agent sidebar_position: 0 x-custom: @@ -915,12 +915,10 @@ Choose the right audio codec for your use case: @@ -1203,22 +1201,18 @@ DEBUG=openai-agents:* npm run dev From eb8dafb0a0a980d195539b114ac6da65e38db8d8 Mon Sep 17 00:00:00 2001 From: manny-r31 Date: Thu, 9 Oct 2025 17:15:20 -0400 Subject: [PATCH 11/73] Light restructuring, wording changes, and cards in resources section for use unless other, specific resource types are added. --- .../realtime-streaming-to-openai/index.mdx | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index bd9db9991..db2326ff3 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -883,6 +883,12 @@ All of this happens in real-time during the conversation. ## Technical Deep Dive +{/* Section reserved for future architectural explanations */} + +--- + +## Audio Processing + ### Audio Processing Pipeline ```mermaid @@ -903,10 +909,6 @@ graph LR - **Latency**: Typically 150-300ms end-to-end - **Quality**: Depends on codec choice (G.711 vs PCM16) ---- - -## Audio Processing - ### Codec Selection Guide Choose the right audio codec for your use case: @@ -1270,7 +1272,7 @@ DEBUG=openai-agents:* npm run dev ## Complete example See the GitHub repo for a complete working example, including -weather and time function examples, +weather and time function usage, error handling, and a production Docker setup. @@ -1285,11 +1287,36 @@ and a production Docker setup. ## Resources -### Documentation -- [SignalWire Call Fabric Documentation](/platform/call-fabric) -- [OpenAI Realtime API Guide](https://platform.openai.com/docs/guides/realtime) -- [cXML Reference](/compatibility-api/cxml) -- [@openai/agents SDK Documentation](https://www.npmjs.com/package/@openai/agents) + + } + > + Learn about SignalWire's Call Fabric platform + + } + > + Official documentation for the OpenAI Realtime API + + } + > + Complete reference for Compatibility XML + + } + > + NPM package documentation for the OpenAI Agents SDK + + From 701d447648ee146b25b05fdf178c3c6f6116a073 Mon Sep 17 00:00:00 2001 From: hey-august Date: Thu, 23 Oct 2025 13:13:45 -0400 Subject: [PATCH 12/73] fix tabs, move content, edits from draft readers --- .../realtime-streaming-to-openai/index.mdx | 75 ++++++++----------- 1 file changed, 31 insertions(+), 44 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index db2326ff3..00e90793a 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -25,14 +25,30 @@ In this guide, we will build a Node.js application that serves a [cXML Script][cxml] that initiates a two-way (bidirectional) [``][bidir-stream] -to the OpenAI Realtime API. -When a caller initiates a SIP or -PSTN -call to the assigned phone number, -the SignalWire platform requests and runs the script. +to a Speech-to-Speech model on the OpenAI Realtime API. +When a caller initiates a call to the assigned phone number, +the SignalWire platform requests and runs the cXML script. + +```mermaid +graph LR + A[Phone call] --> B[SignalWire] + B --> C[WebSocket] + C --> D[Transport layer] + D --> E[OpenAI Realtime] + E --> D + D --> C + C --> B + B --> A +``` {/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. +**Audio Flow Details:** +- **Inbound**: Phone β†’ SignalWire β†’ Base64 β†’ Transport β†’ ArrayBuffer β†’ OpenAI +- **Outbound**: OpenAI β†’ ArrayBuffer β†’ Transport β†’ Base64 β†’ SignalWire β†’ Phone +- **Latency**: Typically 150-300ms end-to-end +- **Quality**: Depends on codec choice (G.711 vs PCM16) + The key architectural components involved are: - **cXML server:** Our Fastify server serves dynamic cXML to the SignalWire platform. @@ -58,13 +74,6 @@ flowchart TD */} -Wondering why this guide uses cXML to stream to OpenAI, instead of using -the [native SWML AI integration](/swml/methods/ai)? -Since OpenAI's Realtime API is built for Speech-to-Speech (or "Voice-to-Voice") models, -the SignalWire platform must stream audio directly to and from OpenAI -instead of handling the STT, TTS, and LLM aspects with our integrated toolchain. -This guide showcases the flexibility of the SignalWire platform to integrate with emerging unified audio models. - ## Prerequisites Before you begin, ensure you have: @@ -88,8 +97,8 @@ Before you begin, ensure you have: Clone the SignalWire Solutions repository, navigate to this example, and install. ```bash -git clone https://github.com/signalwire/solutions-architecture -cd code/cxml-realtime-agent-stream +git clone https://github.com/signalwire/cXML-realtime-agent-stream +cd cxml-realtime-agent-stream npm install ``` @@ -98,11 +107,11 @@ npm install
} > -The SignalWire Solutions repository +View the source code on GitHub
@@ -111,7 +120,7 @@ The SignalWire Solutions repository ### Add OpenAI credentials -Select **Local** or **Docker** +Select the **Local** or **Docker** tab below depending on where you plan to run the application. @@ -157,7 +166,7 @@ npm start - + ```bash docker-compose up --build signalwire-assistant @@ -202,7 +211,7 @@ Select the **Local** tab below if you ran the application locally, and the **Doc - + Use ngrok to expose port 5050 on your development machine: ```bash @@ -212,7 +221,7 @@ ngrok http 5050 Append `/incoming-call` to the HTTPS URL returned by ngrok. https://abc123.ngrok.io/incoming-call - + For production environments, set your server URL + `/incoming-call`: ``` https://your-domain.com/incoming-call @@ -227,7 +236,7 @@ For this example, you **must** include `/incoming-call` at the end of your URL. - Give the cXML Script a descriptive name, such as "AI Voice Assistant". - Save your new Resource. -### Assign SIP address or phone number +### Assign phone number or SIP address To test your AI assistant, create a SIP address or phone number and assign it as a handler for your cXML Script Resource. @@ -887,28 +896,6 @@ All of this happens in real-time during the conversation. --- -## Audio Processing - -### Audio Processing Pipeline - -```mermaid -graph LR - A[Phone Call] --> B[SignalWire] - B --> C[WebSocket] - C --> D[Transport Layer] - D --> E[OpenAI Realtime] - E --> D - D --> C - C --> B - B --> A -``` - -**Audio Flow Details:** -- **Inbound**: Phone β†’ SignalWire β†’ Base64 β†’ Transport β†’ ArrayBuffer β†’ OpenAI -- **Outbound**: OpenAI β†’ ArrayBuffer β†’ Transport β†’ Base64 β†’ SignalWire β†’ Phone -- **Latency**: Typically 150-300ms end-to-end -- **Quality**: Depends on codec choice (G.711 vs PCM16) - ### Codec Selection Guide Choose the right audio codec for your use case: From 0a80f1be82313ccd9d2d24b824c5046b8bb48798 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:17:24 +0000 Subject: [PATCH 13/73] sentence casing --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 00e90793a..d2b9afe35 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1182,7 +1182,7 @@ DEBUG=openai-agents:* npm run dev {/* Needs validation -## Common Issues & Solutions +## Common issues & solutions ### Debugging @@ -1205,7 +1205,7 @@ DEBUG=openai-agents:* npm run dev /> -### Troubleshooting Guide +### Troubleshooting guide | Issue | Cause | Solution | |-------|-------|----------| @@ -1219,7 +1219,7 @@ DEBUG=openai-agents:* npm run dev | Memory leaks | Audio buffer accumulation | Monitor memory usage, implement cleanup | | Session errors | OpenAI API issues | Check API status, implement fallback responses | -### Debug Checklist +### Debug checklist **Basic Setup:** - [ ] Webhook URL includes `/incoming-call` endpoint From a0f370d21c203528e754b3181f727ac3bd7fe4e0 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:17:48 +0000 Subject: [PATCH 14/73] codec comparison table --- .../nodejs/realtime-streaming-to-openai/index.mdx | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index d2b9afe35..cf8ccb48c 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -896,10 +896,21 @@ All of this happens in real-time during the conversation. --- -### Codec Selection Guide +### Audio format comparison Choose the right audio codec for your use case: +| Format | Sample Rate | Bandwidth | Quality | Latency | Best For | +|--------|-------------|-----------|---------|---------|----------| +| **G.711 ΞΌ-law** | 8 kHz | ~64 kbps | Standard telephony | Lower (~100-150ms) | Traditional phone calls, bandwidth-constrained | +| **PCM16 (L16@24000h)** | 24 kHz | ~384 kbps | High definition | Slightly higher (~150-300ms) | Demos, high-quality requirements | + +**Default:** G.711 ΞΌ-law (matches SignalWire's default codec) + +### Codec selection guide + +In your cXML Script or environment configuration: + Date: Fri, 31 Oct 2025 16:18:12 +0000 Subject: [PATCH 15/73] audio accordion --- .../realtime-streaming-to-openai/index.mdx | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index cf8ccb48c..c4a60e8e1 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -886,13 +886,34 @@ All of this happens in real-time during the conversation. - + ---- +### System components + +The voice assistant consists of four key components: + +1. **cXML Server** (Fastify): Receives incoming call webhooks and returns cXML instructions to SignalWire +2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format +3. **AI Integration** (RealtimeSession + RealtimeAgent): Processes speech and generates responses +4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations + +### Audio path + +**Inbound (Caller β†’ AI):** +- Phone β†’ SignalWire β†’ Base64 encoded payload β†’ WebSocket β†’ SignalWireCompatibilityTransportLayer β†’ ArrayBuffer β†’ OpenAI Realtime API -## Technical Deep Dive +**Outbound (AI β†’ Caller):** +- OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone -{/* Section reserved for future architectural explanations */} +### Latency & quality + +- **End-to-End Latency:** 150-300ms typically, depending on codec and network +- **G.711 ΞΌ-law (8kHz):** Lower latency, standard telephony quality, smaller payloads +- **PCM16 (24kHz):** HD audio quality, larger payloads, slightly higher latency + + + + --- From fe64a7945fd1790dc8850ed31dbac9e805edd0bc Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:18:25 +0000 Subject: [PATCH 16/73] uncomment section --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index c4a60e8e1..37820af9b 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1212,7 +1212,7 @@ DEBUG=openai-agents:* npm run dev βœ… Tool call completed: get_weather ``` -{/* Needs validation +--- ## Common issues & solutions @@ -1286,8 +1286,6 @@ DEBUG=openai-agents:* npm run dev - [ ] Function calls (weather, time) work - [ ] Interruptions handled gracefully -*/} - ## Complete example See the GitHub repo for a complete working example, including From b342d6803cc6dbc84e174aa43fd40adbe221b934 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:23:05 +0000 Subject: [PATCH 17/73] new mermaid diagram --- .../realtime-streaming-to-openai/index.mdx | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 37820af9b..c94f3b76a 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -30,15 +30,27 @@ When a caller initiates a call to the assigned phone number, the SignalWire platform requests and runs the cXML script. ```mermaid -graph LR - A[Phone call] --> B[SignalWire] - B --> C[WebSocket] - C --> D[Transport layer] - D --> E[OpenAI Realtime] - E --> D - D --> C - C --> B - B --> A +sequenceDiagram + participant Caller + participant SignalWire + participant Server as Your Server + participant OpenAI + + Caller->>SignalWire: Call arrives + SignalWire->>Server: POST /incoming-call + Server->>SignalWire: cXML: Stream + SignalWire->>Server: WebSocket /media-stream + Server->>OpenAI: RealtimeSession.connect() + + rect rgba(100, 150, 255, 0.2) + Note over Caller,OpenAI: Audio streams bidirectionally + Caller->>SignalWire: Audio + SignalWire->>Server: Audio (base64) via WebSocket + Server->>OpenAI: Audio frames + OpenAI->>Server: Synthesized speech + Server->>SignalWire: Audio (base64) via WebSocket + SignalWire->>Caller: Audio + end ``` {/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. From 8e9b7cbbe7dce2fb89438b64888ececb882b3dc7 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:26:47 +0000 Subject: [PATCH 18/73] update `SignalWireRealtimeTransportLayer` to `SignalWireCompatibilityTransportLayer` --- .../nodejs/realtime-streaming-to-openai/index.mdx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index c94f3b76a..a49b12a77 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -411,7 +411,7 @@ app.get('/media-stream', { websocket: true }, async (connection) => { ```javascript const websocket = require('@fastify/websocket'); -const { SignalWireRealtimeTransportLayer } = require('../transports/SignalWireRealtimeTransportLayer.js'); +const { SignalWireCompatibilityTransportLayer } = require('../transports/SignalWireCompatibilityTransportLayer.js'); const { RealtimeSession, RealtimeAgent } = require('@openai/agents/realtime'); const { AGENT_CONFIG } = require('../config.js'); @@ -422,7 +422,7 @@ app.get('/media-stream', { websocket: true }, async (connection) => { try { // Create SignalWire transport layer with configured audio format - const signalWireTransportLayer = new SignalWireRealtimeTransportLayer({ + const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket: connection, audioFormat: AGENT_CONFIG.audioFormat }); @@ -457,11 +457,11 @@ app.get('/media-stream', { websocket: true }, async (connection) => { -The `SignalWireRealtimeTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: +The `SignalWireCompatibilityTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: ```typescript // Key features of the transport layer: -const transport = new SignalWireRealtimeTransportLayer({ +const transport = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket: connection, audioFormat: 'g711_ulaw' // or 'pcm16' }); @@ -517,7 +517,7 @@ Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. ```typescript import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; -import { SignalWireRealtimeTransportLayer } from '../transports/SignalWireRealtimeTransportLayer.js'; +import { SignalWireCompatibilityTransportLayer } from '../transports/SignalWireCompatibilityTransportLayer.js'; import { allTools } from '../tools/index.js'; // Configure the AI agent @@ -533,7 +533,7 @@ const agentConfig: RealtimeAgentConfiguration = { async function createAISession(signalWireWebSocket: WebSocket): Promise { // Create transport layer that bridges SignalWire and OpenAI - const transport = new SignalWireRealtimeTransportLayer({ + const transport = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket, audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio }); @@ -575,7 +575,7 @@ const agentConfig = { async function createAISession(signalWireWebSocket) { // Create transport layer that bridges SignalWire and OpenAI - const transport = new SignalWireRealtimeTransportLayer({ + const transport = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket, audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio }); From c1aa3f67f6b54fee507ee7221a60548b7ac4e030 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:26:55 +0000 Subject: [PATCH 19/73] correct repo url --- .../guides/voice/nodejs/realtime-streaming-to-openai/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index a49b12a77..cc24b5b89 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1309,7 +1309,7 @@ and a production Docker setup. title="SignalWire + OpenAI Realtime" description="Production-ready implementation with all features" icon={} - href="https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream" + href="https://github.com/signalwire/cXML-realtime-agent-stream" /> --- From 1351d09a9d28bde65391435dcb3fc846f0432ef5 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:29:35 +0000 Subject: [PATCH 20/73] fixes --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index cc24b5b89..727a70887 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -350,7 +350,7 @@ Next, we will create a WebSocket server to handle bidirectional audio streaming. ```typescript import websocket from '@fastify/websocket'; -import { SignalWireRealtimeTransportLayer } from '../transports/SignalWireRealtimeTransportLayer.js'; +import { SignalWireCompatibilityTransportLayer } from '../transports/SignalWireCompatibilityTransportLayer.js'; import { RealtimeSession, RealtimeAgent } from '@openai/agents/realtime'; import { AGENT_CONFIG } from '../config.js'; @@ -378,7 +378,7 @@ app.get('/media-stream', { websocket: true }, async (connection) => { try { // Create SignalWire transport layer with configured audio format - const signalWireTransportLayer = new SignalWireRealtimeTransportLayer({ + const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket: connection, audioFormat: AGENT_CONFIG.audioFormat }); From 21479254c8d358c5b01615969afa2b8f834a5496 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:33:11 +0000 Subject: [PATCH 21/73] mermaid diagram addendum --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 727a70887..0da274da8 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -53,6 +53,15 @@ sequenceDiagram end ``` +:::note Webhook payload +The HTTP POST to `/incoming-call` includes metadata about the call: +- `streamSid` - Unique identifier for this audio stream +- `callSid` - Unique identifier for this call +- `mediaFormat` - Audio codec and sample rate (e.g., G.711 ΞΌ-law at 8kHz) + +Your server uses this information to configure the transport layer with the correct audio format. +::: + {/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. **Audio Flow Details:** From 9a7861d466d2d0a500845b67f4358948cf002963 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 16:33:23 +0000 Subject: [PATCH 22/73] extra super fast quickstart --- .../realtime-streaming-to-openai/index.mdx | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 0da274da8..8e3d9839c 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -107,6 +107,32 @@ Before you begin, ensure you have: ## Quickstart +### 5-Minute Setup (TL;DR) + +```bash +# 1. Clone and install +git clone https://github.com/signalwire/cXML-realtime-agent-stream +cd cxml-realtime-agent-stream +npm install + +# 2. Configure +cp .env.example .env +# Edit .env: add your OPENAI_API_KEY + +# 3. Run +npm run build && npm start + +# 4. Expose (in another terminal) +ngrok http 5050 + +# 5. Create SignalWire resources +# - Create cXML Script pointing to https://your-ngrok-url.ngrok.io/incoming-call +# - Add SIP Address to the script +# - Call your SIP address! +``` + +**Detailed walkthrough below:** + ### Clone and install From 9901dbfc6905c6d9ca98262420e3966df187e703 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 18:37:16 +0000 Subject: [PATCH 23/73] remove unnecessary admonition and duplicate architectural explainer --- .../realtime-streaming-to-openai/index.mdx | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 8e3d9839c..ccb1a50b2 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -53,48 +53,6 @@ sequenceDiagram end ``` -:::note Webhook payload -The HTTP POST to `/incoming-call` includes metadata about the call: -- `streamSid` - Unique identifier for this audio stream -- `callSid` - Unique identifier for this call -- `mediaFormat` - Audio codec and sample rate (e.g., G.711 ΞΌ-law at 8kHz) - -Your server uses this information to configure the transport layer with the correct audio format. -::: - -{/* This architectural explainer is a DRAFT. It could be useful, but needs further refinement. - -**Audio Flow Details:** -- **Inbound**: Phone β†’ SignalWire β†’ Base64 β†’ Transport β†’ ArrayBuffer β†’ OpenAI -- **Outbound**: OpenAI β†’ ArrayBuffer β†’ Transport β†’ Base64 β†’ SignalWire β†’ Phone -- **Latency**: Typically 150-300ms end-to-end -- **Quality**: Depends on codec choice (G.711 vs PCM16) - -The key architectural components involved are: - -- **cXML server:** Our Fastify server serves dynamic cXML to the SignalWire platform. -This gives our application the ability to update the call instructions according to each request. -- **WebSocket bridge:** Enables real-time audio streaming between telephony and AI -- **AI integration:** Natural conversations with OpenAI's Realtime API -- **Function calling:** Server-side tool execution during conversations - -Here's what happens when someone calls your application: - -```mermaid -flowchart TD - A(Phone call) --> B(SignalWire platform) - B --> |Request cXML Script via webhook| C(Your server) - B --> |Bidirectional WebSocket connection| D(OpenAI API) -``` - -1. **Call arrives** at SignalWire -2. **Webhook triggers** your server endpoint -3. **WebSocket streams** audio bidirectionally -4. **AI processes** speech in real-time -5. **Responses flow back** to the caller - -*/} - ## Prerequisites Before you begin, ensure you have: From 52651f1bed2c6a64c54c9e06a260fc6ca6c0f0f0 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 19:03:07 +0000 Subject: [PATCH 24/73] remove JavaScript examples --- .../realtime-streaming-to-openai/index.mdx | 199 +----------------- 1 file changed, 2 insertions(+), 197 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index ccb1a50b2..2efcb1cbd 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -273,9 +273,6 @@ First, your server needs to handle incoming call webhooks from SignalWire. **Set up the HTTP endpoint** - - - ```typescript import Fastify from 'fastify'; @@ -321,9 +318,6 @@ app.post('/incoming-call', async (req, res) => { app.listen({ port: 5050, host: '0.0.0.0' }); ``` - - - :::tip Webhook URL Format Your webhook URL must include `/incoming-call` at the end: - Local: `https://your-ngrok-url.ngrok.io/incoming-call` @@ -399,42 +393,8 @@ app.get('/media-stream', { websocket: true }, async (connection) => { }); ``` - - - -```javascript -const websocket = require('@fastify/websocket'); -const { SignalWireCompatibilityTransportLayer } = require('../transports/SignalWireCompatibilityTransportLayer.js'); -const { RealtimeSession, RealtimeAgent } = require('@openai/agents/realtime'); -const { AGENT_CONFIG } = require('../config.js'); - -app.register(websocket); - -app.get('/media-stream', { websocket: true }, async (connection) => { - console.log('πŸ“ž Client connected to WebSocket'); - - try { - // Create SignalWire transport layer with configured audio format - const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ - signalWireWebSocket: connection, - audioFormat: AGENT_CONFIG.audioFormat - }); - - // Create AI agent and session - const realtimeAgent = new RealtimeAgent(agentConfig); - const session = new RealtimeSession(realtimeAgent, { - transport: signalWireTransportLayer, - model: 'gpt-4o-realtime-preview' - }); - - // Connect to OpenAI Realtime API - await session.connect({ - apiKey: process.env.OPENAI_API_KEY - }); - - // Handle session events - session.on('agent_tool_start', (context, agent, tool, details) => { - console.log('πŸ”§ Tool call started:', details); + session.on('agent_tool_end', (context, agent, tool, result, details) => { + console.log('βœ… Tool call completed:', details); }); } catch (error) { @@ -443,9 +403,6 @@ app.get('/media-stream', { websocket: true }, async (connection) => { }); ``` - - - @@ -547,51 +504,6 @@ async function createAISession(signalWireWebSocket: WebSocket): Promise - - -```javascript -const { RealtimeAgent, RealtimeSession } = require('@openai/agents/realtime'); -const { SignalWireRealtimeTransportLayer } = require('../transports/SignalWireRealtimeTransportLayer.js'); -const { allTools } = require('../tools/index.js'); - -// Configure the AI agent -const agentConfig = { - name: 'SignalWire Voice Assistant', - instructions: `You are a helpful and friendly voice assistant. - Always start every conversation by greeting the caller first. - You can help with weather information, time queries, and general conversation. - Be concise and friendly in your responses.`, - tools: allTools, // Weather, time, and other tools - voice: 'alloy' -}; - -async function createAISession(signalWireWebSocket) { - // Create transport layer that bridges SignalWire and OpenAI - const transport = new SignalWireCompatibilityTransportLayer({ - signalWireWebSocket, - audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio - }); - - // Create agent and session - const agent = new RealtimeAgent(agentConfig); - const session = new RealtimeSession(agent, { - transport, - model: 'gpt-4o-realtime-preview' - }); - - // Connect to OpenAI - await session.connect({ - apiKey: process.env.OPENAI_API_KEY - }); - - return session; -} -``` - - - - **Send Audio Back to Caller** ```typescript @@ -773,113 +685,6 @@ const agentConfig = { }; ``` - - - -```javascript -const { tool: realtimeTool } = require('@openai/agents/realtime'); -const { z } = require('zod'); - -// Weather tool using real US National Weather Service API -const weatherTool = realtimeTool({ - name: 'get_weather', - description: 'Get current weather information for any US city', - parameters: z.object({ - location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)') - }), - execute: async ({ location }) => { - try { - // Step 1: Geocoding - Convert city name to coordinates - const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; - const geocodeResponse = await fetch(geocodeUrl, { - headers: { - 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0' - } - }); - - if (!geocodeResponse.ok) { - return 'Sorry, weather information is currently unavailable.'; - } - - const geocodeData = await geocodeResponse.json(); - if (!geocodeData || geocodeData.length === 0) { - return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; - } - - const lat = parseFloat(geocodeData[0].lat); - const lon = parseFloat(geocodeData[0].lon); - - // Step 2: Get weather from weather.gov - const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; - const pointsResponse = await fetch(pointsUrl); - const pointsData = await pointsResponse.json(); - - const forecastUrl = pointsData.properties?.forecast; - if (!forecastUrl) { - return 'Sorry, weather information is currently unavailable.'; - } - - const forecastResponse = await fetch(forecastUrl); - const forecastData = await forecastResponse.json(); - - const currentPeriod = forecastData.properties?.periods?.[0]; - if (!currentPeriod) { - return 'Sorry, weather information is currently unavailable.'; - } - - // Format response for voice - const cityName = geocodeData[0].display_name.split(',')[0]; - return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; - - } catch (error) { - return 'Sorry, weather information is currently unavailable.'; - } - } -}); - -// Time tool example (no external API required) -const timeTool = realtimeTool({ - name: 'get_time', - description: 'Get the current time in Eastern Time', - parameters: z.object({}), // No parameters needed - execute: async () => { - try { - const now = new Date(); - const easternTime = now.toLocaleString('en-US', { - timeZone: 'America/New_York', - timeZoneName: 'short', - weekday: 'long', - year: 'numeric', - month: 'long', - day: 'numeric', - hour: 'numeric', - minute: '2-digit' - }); - return `The current time in Eastern Time is ${easternTime}.`; - } catch (error) { - return 'Sorry, time information is currently unavailable.'; - } - } -}); - -// Export all tools -module.exports = { allTools: [weatherTool, timeTool] }; - -// Add to your AI agent configuration -const agentConfig = { - name: 'SignalWire Voice Assistant', - instructions: `You are a helpful and friendly voice assistant. - Always start every conversation by greeting the caller first. - You can help with weather information, time queries, and general conversation. - Be concise and friendly in your responses.`, - tools: allTools, - voice: 'alloy' -}; -``` - - - - 1. **User asks**: "What's the weather in New York?" 2. **AI recognizes intent**: Needs weather information 3. **Function call triggered**: `get_weather({ location: "New York" })` From 1477d3f3510d528e71d5ec29874c1427b4656f1c Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 19:03:34 +0000 Subject: [PATCH 25/73] remove redundant 'local development' section --- .../realtime-streaming-to-openai/index.mdx | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 2efcb1cbd..74f0f1a92 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -847,46 +847,6 @@ For production deployments: ## Deployment -### Local development - -1. **Install dependencies** - ```bash - npm install - ``` - -2. **Set up environment** - ```bash - cp .env.example .env - # Edit .env with your OpenAI API key - ``` - -3. **Start your server** - ```bash - npm run build - npm start - - # Or for development with hot reload: - npm run dev - ``` - -4. **Expose with ngrok** - ```bash - npx ngrok http 5050 - # Note the HTTPS URL (e.g., https://abc123.ngrok.io) - ``` - -5. **Configure SignalWire webhook** - - Use the ngrok HTTPS URL + `/incoming-call` - - Example: `https://abc123.ngrok.io/incoming-call` - -6. **Test your setup** - ```bash - # Check health endpoint - curl https://abc123.ngrok.io/health - - # Should return: {"status":"healthy","timestamp":"..."} - ``` - ### Production with Docker From 02aedf4809178bf409c9cbfab6335f7c5700b631 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 19:10:35 +0000 Subject: [PATCH 26/73] rename quickstart --- .../realtime-streaming-to-openai/index.mdx | 30 ++++--------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 74f0f1a92..2fc0bcfda 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -63,9 +63,9 @@ Before you begin, ensure you have: - **ngrok** or other tunneling service - For local development tunneling ([Install ngrok](https://ngrok.com/download)) - **Docker** (optional) - For containerized deployment -## Quickstart +## Get started -### 5-Minute Setup (TL;DR) +### Quickstart ```bash # 1. Clone and install @@ -291,29 +291,9 @@ app.post('/incoming-call', async (req, res) => { res.type('text/xml').send(cxml); }); -app.listen({ port: 5050, host: '0.0.0.0' }); -``` - - - - -```javascript -const Fastify = require('fastify'); - -const app = Fastify(); - -app.post('/incoming-call', async (req, res) => { - const host = req.headers.host; - const wsUrl = `wss://${host}/media-stream`; - - // Return cXML instructions to stream audio - const cxml = ` - - - `; - - res.type('text/xml').send(cxml); -}); +// For production with proxies (ngrok, load balancers), detect protocol: +// const protocol = req.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; +// const wsUrl = `${protocol}://${host}/media-stream`; app.listen({ port: 5050, host: '0.0.0.0' }); ``` From b662394577b68a05ebfd3922ec385fd156af7506 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 31 Oct 2025 19:11:25 +0000 Subject: [PATCH 27/73] simplify code samples --- .../realtime-streaming-to-openai/index.mdx | 33 +++---------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 2fc0bcfda..ac8a13f01 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -316,38 +316,19 @@ Next, we will create a WebSocket server to handle bidirectional audio streaming. ```typescript -import websocket from '@fastify/websocket'; +import type { WebSocket } from 'ws'; import { SignalWireCompatibilityTransportLayer } from '../transports/SignalWireCompatibilityTransportLayer.js'; import { RealtimeSession, RealtimeAgent } from '@openai/agents/realtime'; -import { AGENT_CONFIG } from '../config.js'; - -interface SignalWireMessage { - event: 'start' | 'media' | 'stop' | 'mark'; - media?: { - payload: string; // Base64 encoded audio - track?: 'inbound' | 'outbound'; - }; - start?: { - streamSid: string; - callSid: string; - mediaFormat?: { - encoding: string; - sampleRate: number; - channels: number; - }; - }; -} - -app.register(websocket); +import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; -app.get('/media-stream', { websocket: true }, async (connection) => { +app.get('/media-stream', { websocket: true }, async (connection: WebSocket) => { console.log('πŸ“ž Client connected to WebSocket'); try { // Create SignalWire transport layer with configured audio format const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket: connection, - audioFormat: AGENT_CONFIG.audioFormat + audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio }); // Create AI agent and session @@ -367,12 +348,6 @@ app.get('/media-stream', { websocket: true }, async (connection) => { console.log('πŸ”§ Tool call started:', details); }); - } catch (error) { - console.error('❌ Transport initialization failed:', error); - } -}); -``` - session.on('agent_tool_end', (context, agent, tool, result, details) => { console.log('βœ… Tool call completed:', details); }); From 2f6a6d6d80ea6320790a0075938e85682e9892fa Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 13:43:54 +0000 Subject: [PATCH 28/73] claude.md --- .cursor/rules/docs.mdc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc index c75bb0ff4..ee687df4d 100644 --- a/.cursor/rules/docs.mdc +++ b/.cursor/rules/docs.mdc @@ -159,6 +159,15 @@ export const products = [ - Provide alt text for custom images - Ensure proper focus management for interactive elements +## Development and build commands + +### Package manager +**This project uses Yarn, not npm.** Always use `yarn` for all package management commands. +- Build: `yarn --cwd website build` +- Develop: `yarn --cwd website start` (for local preview) +- Install: `yarn install` +- Do NOT use: `npm install`, `npm run`, `npx` - use `yarn` equivalents instead + ## Quality assurance ### Content review checklist From d5e05050b8c7ad98f6e9fd79fb27130cc3094d97 Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 20:39:55 +0000 Subject: [PATCH 29/73] merge 'get started' and 'quickstart' --- .../realtime-streaming-to-openai/index.mdx | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index ac8a13f01..fca378818 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -63,33 +63,7 @@ Before you begin, ensure you have: - **ngrok** or other tunneling service - For local development tunneling ([Install ngrok](https://ngrok.com/download)) - **Docker** (optional) - For containerized deployment -## Get started - -### Quickstart - -```bash -# 1. Clone and install -git clone https://github.com/signalwire/cXML-realtime-agent-stream -cd cxml-realtime-agent-stream -npm install - -# 2. Configure -cp .env.example .env -# Edit .env: add your OPENAI_API_KEY - -# 3. Run -npm run build && npm start - -# 4. Expose (in another terminal) -ngrok http 5050 - -# 5. Create SignalWire resources -# - Create cXML Script pointing to https://your-ngrok-url.ngrok.io/incoming-call -# - Add SIP Address to the script -# - Call your SIP address! -``` - -**Detailed walkthrough below:** +## Quickstart From ba550f35aad5bc28cea4a1f66d56d5d68733d396 Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 20:41:40 +0000 Subject: [PATCH 30/73] codeblocks and capitalization --- .../realtime-streaming-to-openai/index.mdx | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index fca378818..24861f691 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -77,7 +77,7 @@ Clone the SignalWire Solutions repository, navigate to this example, and install ```bash git clone https://github.com/signalwire/cXML-realtime-agent-stream -cd cxml-realtime-agent-stream +cd cXML-realtime-agent-stream npm install ``` @@ -112,7 +112,7 @@ cp .env.example .env Edit `.env` and add your OpenAI API key: -```bash title=".env" +```bash OPENAI_API_KEY=sk-your-actual-api-key-here ``` @@ -122,11 +122,11 @@ OPENAI_API_KEY=sk-your-actual-api-key-here When running the server in production with the Docker container, store your credentials in a `secrets` folder. -```bash +```bash title="Create secrets directory" mkdir secrets ``` -```bash +```bash title="Store API key in secrets" echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt ``` @@ -138,7 +138,7 @@ echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt -```bash +```bash title="Local - Build and run" npm run build npm start ``` @@ -147,7 +147,7 @@ npm start -```bash +```bash title="Docker - Build and run" docker-compose up --build signalwire-assistant ``` @@ -164,7 +164,7 @@ curl http://localhost:5050/health ``` ::: -### Create a cXML Script +### Create a cXML script Next, we need to tell SignalWire to request cXML from your server when a call comes in. @@ -193,7 +193,7 @@ Select the **Local** tab below if you ran the application locally, and the **Doc Use ngrok to expose port 5050 on your development machine: -```bash +```bash title="Start ngrok tunnel" ngrok http 5050 ``` @@ -284,7 +284,7 @@ Your webhook URL must include `/incoming-call` at the end: Next, we will create a WebSocket server to handle bidirectional audio streaming. -**Initialize WebSocket Server** +**Initialize WebSocket server** @@ -338,7 +338,7 @@ app.get('/media-stream', { websocket: true }, async (connection: WebSocket) => { The `SignalWireCompatibilityTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: -```typescript +```typescript title="Transport layer instantiation" // Key features of the transport layer: const transport = new SignalWireCompatibilityTransportLayer({ signalWireWebSocket: connection, @@ -353,7 +353,7 @@ const transport = new SignalWireCompatibilityTransportLayer({ // 5. Session cleanup ``` -**Session Lifecycle:** +**Session lifecycle:** 1. **WebSocket connection** β†’ SignalWire connects to `/media-stream` 2. **Transport creation** β†’ Bridge between SignalWire and OpenAI 3. **AI session start** β†’ RealtimeSession connects to OpenAI @@ -377,7 +377,7 @@ SignalWire sends several types of messages through the WebSocket: - **Session lifecycle management** with proper cleanup - **Error recovery** and reconnection handling -**Audio Format Support:** +**Audio format support:** - **Input**: G.711 ΞΌ-law (8kHz) or PCM16 (24kHz) from SignalWire - **Output**: Matches input format automatically - **OpenAI Integration**: Handles format negotiation transparently @@ -481,7 +481,6 @@ AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio For production with Docker secrets: ```bash -# Create secrets directory mkdir -p secrets echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt ``` @@ -508,7 +507,9 @@ Choose the right audio format for your use case: Enable your AI to execute server-side tools during conversations. -**Define Tools** +**Define tools** + +Tools are functions the AI can call during a conversation. Here's the structureβ€”in production, you extract the implementation logic into separate files as shown in the repository. @@ -627,7 +628,7 @@ All of this happens in real-time during the conversation. -### System components +**System components** The voice assistant consists of four key components: @@ -636,12 +637,12 @@ The voice assistant consists of four key components: 3. **AI Integration** (RealtimeSession + RealtimeAgent): Processes speech and generates responses 4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations -### Audio path +**Audio path** -**Inbound (Caller β†’ AI):** +*Inbound (Caller β†’ AI):* - Phone β†’ SignalWire β†’ Base64 encoded payload β†’ WebSocket β†’ SignalWireCompatibilityTransportLayer β†’ ArrayBuffer β†’ OpenAI Realtime API -**Outbound (AI β†’ Caller):** +*Outbound (AI β†’ Caller):* - OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone ### Latency & quality @@ -706,7 +707,7 @@ AUDIO_FORMAT=pcm16 # or g711_ulaw -### Advanced Configuration +### Advanced configuration @@ -872,13 +873,13 @@ secrets: - Consider using Redis for session state if needed - Monitor memory usage (audio buffers can accumulate) -**Error Handling:** +**Error handling:** - Graceful degradation when OpenAI API is unavailable - Retry logic with exponential backoff - Proper WebSocket reconnection handling - Fallback responses when tools fail -**Development Workflow:** +**Development workflow:** ```bash # Local development with hot reload npm run dev @@ -897,7 +898,7 @@ DEBUG=openai-agents:* npm run dev --- -**Console Output to Look For:** +**Console output to look for:** ```bash πŸ“‘ Server running on http://0.0.0.0:5050 πŸ₯ Health check: http://0.0.0.0:5050/health @@ -952,7 +953,7 @@ DEBUG=openai-agents:* npm run dev ### Debug checklist -**Basic Setup:** +**Basic setup:** - [ ] Webhook URL includes `/incoming-call` endpoint - [ ] ngrok is running and exposing port 5050 (for local dev) - [ ] OpenAI API key is properly configured From 8479f89faefcbbb45354c302ecde49389d2ffed8 Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 20:42:04 +0000 Subject: [PATCH 31/73] links --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 24861f691..c18014e3a 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -1040,5 +1040,5 @@ and a production Docker setup. [cxml]: /compatibility-api/cxml "Documentation for cXML, or Compatibility XML." [bidir-stream]: /compatibility-api/cxml/voice/stream#bidirectional-stream "Technical reference for creating a bidirectional Stream in cXML." [resources]: https://my.signalwire.com?page=resources "The My Resources page of your SignalWire Dashboard." -[repo]: https://github.com/signalwire/solutions-architecture/tree/main/code/cxml-realtime-agent-stream "This project's GitHub repository." -[openai-realtime-api]: https://platform.openai.com/docs/guides/realtime "The OpenAI Realtime API" \ No newline at end of file +[repo]: https://github.com/signalwire/cXML-realtime-agent-stream "This project's GitHub repository." +[openai-realtime-api]: https://platform.openai.com/docs/guides/realtime "The OpenAI Realtime API" From 2eabff220a051913cab4dbff6eb277a3eed672de Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 20:44:38 +0000 Subject: [PATCH 32/73] simplify, consolidate audio sections --- .../realtime-streaming-to-openai/index.mdx | 43 +++++++------------ 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index c18014e3a..33e09e45d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -17,7 +17,7 @@ import AddResource from '/docs/main/_common/dashboard/add-resource.mdx'; import ResourcesFyi from '/docs/main/_common/call-fabric/resources-fyi-card.mdx'; import { MdCode } from "react-icons/md"; -# Stream to OpenAI Realtime API agent with cXML +# Stream an OpenAI Realtime API agent with a cXML script Put OpenAI Speech-to-Speech models on the phone with cXML `` @@ -495,12 +495,6 @@ environment: -:::note Audio Format Options -Choose the right audio format for your use case: -- **g711_ulaw (8kHz)**: Standard telephony quality (default) -- **pcm16 (24kHz)**: High definition audio for demos -::: - @@ -645,12 +639,6 @@ The voice assistant consists of four key components: *Outbound (AI β†’ Caller):* - OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone -### Latency & quality - -- **End-to-End Latency:** 150-300ms typically, depending on codec and network -- **G.711 ΞΌ-law (8kHz):** Lower latency, standard telephony quality, smaller payloads -- **PCM16 (24kHz):** HD audio quality, larger payloads, slightly higher latency - @@ -661,29 +649,30 @@ The voice assistant consists of four key components: Choose the right audio codec for your use case: -| Format | Sample Rate | Bandwidth | Quality | Latency | Best For | -|--------|-------------|-----------|---------|---------|----------| -| **G.711 ΞΌ-law** | 8 kHz | ~64 kbps | Standard telephony | Lower (~100-150ms) | Traditional phone calls, bandwidth-constrained | -| **PCM16 (L16@24000h)** | 24 kHz | ~384 kbps | High definition | Slightly higher (~150-300ms) | Demos, high-quality requirements | - -**Default:** G.711 ΞΌ-law (matches SignalWire's default codec) - -### Codec selection guide - -In your cXML Script or environment configuration: - + > +Sample rate: `24 kHz` +Bandwidth: `~384 kbps` +Quality: `High definition` +Latency: `150-300ms` + + > +Sample rate: `8 kHz` +Bandwidth: `~64 kbps` +Quality: `Standard telephony` +Latency: `100-150ms` + -### Configure Audio Format +**Default:** G.711 ΞΌ-law (matches SignalWire's default codec) + +### Configure audio format From 0897b3c9c0a5d8fcde07036dc02068bdb3568938 Mon Sep 17 00:00:00 2001 From: ALR Date: Mon, 3 Nov 2025 20:46:13 +0000 Subject: [PATCH 33/73] misc explanation --- .../realtime-streaming-to-openai/index.mdx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 33e09e45d..4d34e1b75 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -191,6 +191,16 @@ Select the **Local** tab below if you ran the application locally, and the **Doc + +SignalWire must be able to reach your webhook from the internet. +For local development, use +[ngrok](https://ngrok.com/), +or other +[tunneling service](https://github.com/anderspitman/awesome-tunneling), +to create a secure public tunnel to your local server. + +**Why ngrok is needed:** SignalWire's servers make HTTPS calls to your webhook URL when incoming calls arrive. Your local computer is behind a firewall and not accessible from the internet, so ngrok creates a publicly accessible HTTPS proxy that forwards traffic to your `localhost:5050`. + Use ngrok to expose port 5050 on your development machine: ```bash title="Start ngrok tunnel" @@ -457,7 +467,7 @@ session.on('error', (error) => { }); ``` -**Environment Configuration** +**Environment configuration** Set up your environment variables for different deployment scenarios: @@ -478,7 +488,9 @@ AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio -For production with Docker secrets: +For production, store your API credentials securely using Docker secrets rather than environment variables. This keeps sensitive data out of version control and environment files. + +**Set up secrets:** ```bash mkdir -p secrets From 0dc8af0639cd3570327905e013ea8fabeca13298 Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 21:42:01 +0000 Subject: [PATCH 34/73] explain ngrok --- .../nodejs/realtime-streaming-to-openai/index.mdx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 4d34e1b75..11999d05d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -207,8 +207,16 @@ Use ngrok to expose port 5050 on your development machine: ngrok http 5050 ``` -Append `/incoming-call` to the HTTPS URL returned by ngrok. -https://abc123.ngrok.io/incoming-call +The output will look like: +```bash title="ngrok output" +Forwarding https://abc123def456.ngrok.io -> http://localhost:5050 +``` + +Append `/incoming-call` to the HTTPS URL provided by ngrok: +```bash title="Webhook URL" +https://abc123def456.ngrok.io/incoming-call +``` + For production environments, set your server URL + `/incoming-call`: From 8400c1bb3945843843b9504282ac750fa9150265 Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 21:52:58 +0000 Subject: [PATCH 35/73] accordion rename --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 11999d05d..1d0be3dcd 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -402,9 +402,9 @@ SignalWire sends several types of messages through the WebSocket: - + -Connect your WebSocket bridge to OpenAI's Realtime API for AI processing. +The AI agent configuration defines how your assistant behaves. Import your tools and set instructions: **Create the AI Session** From 3e10e486fca13b0dc28cfd254a1f83f27d35f7e5 Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 21:53:51 +0000 Subject: [PATCH 36/73] tags --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 1d0be3dcd..7e017bba7 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -350,6 +350,9 @@ app.get('/media-stream', { websocket: true }, async (connection: WebSocket) => { }); ``` + + + @@ -638,6 +641,9 @@ const agentConfig = { All of this happens in real-time during the conversation. + + + From 2f4c067d85b7d74fd731d94f8d27047c6b4afec7 Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 21:54:33 +0000 Subject: [PATCH 37/73] env and secrets --- .../realtime-streaming-to-openai/index.mdx | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 7e017bba7..898c28a26 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -508,13 +508,62 @@ mkdir -p secrets echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt ``` -Environment variables in `docker-compose.yml`: -```yaml -environment: - - PORT=5050 - - AUDIO_FORMAT=pcm16 +**docker-compose.yml configuration:** + +The `docker-compose.yml` file references the secret and mounts it into the container: + +```yaml title="docker-compose.yml" +services: + signalwire-assistant: + # ... other config + secrets: + - openai_api_key + +secrets: + openai_api_key: + file: ./secrets/openai_api_key.txt ``` +**Reading secrets in your application:** + +Your application reads from the Docker secret at runtime, checking the secret file first and falling back to an environment variable: + +```typescript title="src/config.ts - Read Docker secrets" +import * as fs from 'fs'; + +function getOpenAIApiKey(): string { + // First try to read from Docker secret (for containerized deployments) + const secretPath = '/run/secrets/openai_api_key'; + try { + if (fs.existsSync(secretPath)) { + const apiKey = fs.readFileSync(secretPath, 'utf8').trim(); + if (apiKey) { + return apiKey; + } + } + } catch (error) { + // Fall back to environment variable if secret reading fails + // (logging omitted for simplicity) + } + + // Fallback to environment variable + const envApiKey = process.env.OPENAI_API_KEY; + if (envApiKey) { + return envApiKey; + } + + return ''; +} + +const OPENAI_API_KEY = getOpenAIApiKey(); +``` + +**Important reminders:** + +- Always add `secrets/` to your `.gitignore` to prevent accidental commits +- Docker secrets are mounted at `/run/secrets/` inside the container +- Keep credentials out of `.env` files and version control + From b7c411aa0806c78f77797381aa636dea4dc7b01c Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 21:54:53 +0000 Subject: [PATCH 38/73] code changes --- .../realtime-streaming-to-openai/index.mdx | 360 ++++++++---------- 1 file changed, 168 insertions(+), 192 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 898c28a26..a4f16ead3 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -265,29 +265,29 @@ First, your server needs to handle incoming call webhooks from SignalWire. **Set up the HTTP endpoint** -```typescript -import Fastify from 'fastify'; - -const app = Fastify(); - -app.post('/incoming-call', async (req, res) => { - const host = req.headers.host; - const wsUrl = `wss://${host}/media-stream`; - - // Return cXML instructions to stream audio - const cxml = ` +```typescript title="src/routes/webhook.ts" +import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; + +export async function webhookRoute(fastify: FastifyInstance) { + fastify.all('/incoming-call', async (request: FastifyRequest, reply: FastifyReply) => { + // Dynamically construct WebSocket URL from request headers + const host = request.headers.host || 'localhost'; + const protocol = request.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; + const websocketUrl = `${protocol}://${host}/media-stream`; + + // Codec negotiation omitted for simplicity + // Generate cXML response to stream audio to our WebSocket + const cXMLResponse = ` - + Connecting to agent + + + `; - res.type('text/xml').send(cxml); -}); - -// For production with proxies (ngrok, load balancers), detect protocol: -// const protocol = req.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; -// const wsUrl = `${protocol}://${host}/media-stream`; - -app.listen({ port: 5050, host: '0.0.0.0' }); + reply.type('text/xml').send(cXMLResponse); + }); +} ``` :::tip Webhook URL Format @@ -307,46 +307,59 @@ Next, we will create a WebSocket server to handle bidirectional audio streaming. -```typescript +```typescript title="src/routes/streaming.ts" import type { WebSocket } from 'ws'; +import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; import { SignalWireCompatibilityTransportLayer } from '../transports/SignalWireCompatibilityTransportLayer.js'; -import { RealtimeSession, RealtimeAgent } from '@openai/agents/realtime'; -import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; -app.get('/media-stream', { websocket: true }, async (connection: WebSocket) => { - console.log('πŸ“ž Client connected to WebSocket'); +fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) => { + // Handle client disconnection + connection.on('close', () => { + console.log('Client disconnected'); + }); - try { - // Create SignalWire transport layer with configured audio format - const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ - signalWireWebSocket: connection, - audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio - }); - - // Create AI agent and session - const realtimeAgent = new RealtimeAgent(agentConfig); - const session = new RealtimeSession(realtimeAgent, { - transport: signalWireTransportLayer, - model: 'gpt-4o-realtime-preview' - }); - - // Connect to OpenAI Realtime API - await session.connect({ - apiKey: process.env.OPENAI_API_KEY - }); - - // Handle session events - session.on('agent_tool_start', (context, agent, tool, details) => { - console.log('πŸ”§ Tool call started:', details); - }); - - session.on('agent_tool_end', (context, agent, tool, result, details) => { - console.log('βœ… Tool call completed:', details); - }); + // Handle connection errors + connection.on('error', (error) => { + console.error('Connection error:', error); + }); - } catch (error) { - console.error('❌ Transport initialization failed:', error); - } + try { + // Create SignalWire transport layer with configured audio format + const signalWireTransportLayer = new SignalWireCompatibilityTransportLayer({ + signalWireWebSocket: connection, + audioFormat: AGENT_CONFIG.audioFormat + }); + + // Create session with SignalWire transport + const session = new RealtimeSession(realtimeAgent, { + transport: signalWireTransportLayer, + model: model + }); + + // Listen to session events for tool call lifecycle + session.on('agent_tool_start', (context, agent, tool, details) => { + console.log('Tool call started:', details); + }); + + session.on('agent_tool_end', (context, agent, tool, result, details) => { + console.log('Tool call completed:', details); + }); + + // Handle errors gracefully + session.on('error', (error) => { + console.error('Session error:', error); + }); + + // Connect to OpenAI Realtime API via the transport layer + await session.connect({ + apiKey: process.env.OPENAI_API_KEY + }); + + // Transport event monitoring and response trigger omitted for simplicity + + } catch (error) { + console.error('Error initializing session:', error); + } }); ``` @@ -409,73 +422,28 @@ SignalWire sends several types of messages through the WebSocket: The AI agent configuration defines how your assistant behaves. Import your tools and set instructions: -**Create the AI Session** - - - - -```typescript -import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; +```typescript title="src/config.ts - Agent configuration" import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; -import { SignalWireCompatibilityTransportLayer } from '../transports/SignalWireCompatibilityTransportLayer.js'; import { allTools } from '../tools/index.js'; -// Configure the AI agent -const agentConfig: RealtimeAgentConfiguration = { - name: 'SignalWire Voice Assistant', - instructions: `You are a helpful and friendly voice assistant. - Always start every conversation by greeting the caller first. - You can help with weather information, time queries, and general conversation. - Be concise and friendly in your responses.`, - tools: allTools, // Weather, time, and other tools - voice: 'alloy' +export const AGENT_CONFIG: RealtimeAgentConfiguration = { + name: 'SignalWire Voice Assistant', + voice: 'alloy', + model: 'gpt-4o-realtime-preview', + audioFormat: process.env.AUDIO_FORMAT || 'g711_ulaw', + instructions: ` + You are a helpful and friendly voice assistant integrated with SignalWire. + + IMPORTANT: Always start every conversation by greeting the caller first. Begin with something like "Hello! I'm your AI voice assistant. How can I help you today?" + + You can help with weather information, time queries, and general conversation. + Be concise and friendly in your responses, remembering you're on a phone call. + When you first greet someone, briefly mention that you can help with weather, time, and answering questions. + Always confirm when you're about to use a tool. + ` }; -async function createAISession(signalWireWebSocket: WebSocket): Promise { - // Create transport layer that bridges SignalWire and OpenAI - const transport = new SignalWireCompatibilityTransportLayer({ - signalWireWebSocket, - audioFormat: 'g711_ulaw' // or 'pcm16' for HD audio - }); - - // Create agent and session - const agent = new RealtimeAgent(agentConfig); - const session = new RealtimeSession(agent, { - transport, - model: 'gpt-4o-realtime-preview' - }); - - // Connect to OpenAI - await session.connect({ - apiKey: process.env.OPENAI_API_KEY - }); - - return session; -} -``` - -**Send Audio Back to Caller** - -```typescript -// Audio is automatically handled by SignalWireRealtimeTransportLayer -// The transport layer manages: -// 1. Audio format conversion (g711_ulaw ↔ pcm16) -// 2. Base64 encoding/decoding -// 3. Chunk timing and interruption handling -// 4. Mark events for tracking audio playback - -// Example of session event handling: -session.on('agent_tool_start', (context, agent, tool, details) => { - console.log('πŸ”§ Tool call started:', details); -}); - -session.on('agent_tool_end', (context, agent, tool, result, details) => { - console.log('βœ… Tool call completed:', details); -}); - -session.on('error', (error) => { - console.error('❌ Session error:', error); -}); +// The agent is instantiated in the WebSocket handler with: new RealtimeAgent(AGENT_CONFIG) ``` **Environment configuration** @@ -580,90 +548,98 @@ Tools are functions the AI can call during a conversation. Here's the structure -```typescript -import { tool as realtimeTool } from '@openai/agents/realtime'; +```typescript title="src/tools/weather.tool.ts" import { z } from 'zod'; +import { tool as realtimeTool } from '@openai/agents/realtime'; -// Weather tool using real US National Weather Service API +// Weather tool that calls an external API const weatherTool = realtimeTool({ - name: 'get_weather', - description: 'Get current weather information for any US city', - parameters: z.object({ - location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)') - }), - execute: async ({ location }) => { - try { - // Step 1: Geocoding - Convert city name to coordinates - const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; - const geocodeResponse = await fetch(geocodeUrl, { - headers: { - 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0' - } - }); - - if (!geocodeResponse.ok) { - return 'Sorry, weather information is currently unavailable.'; - } - - const geocodeData = await geocodeResponse.json(); - if (!geocodeData || geocodeData.length === 0) { - return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; - } - - const lat = parseFloat(geocodeData[0].lat); - const lon = parseFloat(geocodeData[0].lon); - - // Step 2: Get weather from weather.gov - const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; - const pointsResponse = await fetch(pointsUrl); - const pointsData = await pointsResponse.json(); - - const forecastUrl = pointsData.properties?.forecast; - if (!forecastUrl) { - return 'Sorry, weather information is currently unavailable.'; - } - - const forecastResponse = await fetch(forecastUrl); - const forecastData = await forecastResponse.json(); - - const currentPeriod = forecastData.properties?.periods?.[0]; - if (!currentPeriod) { - return 'Sorry, weather information is currently unavailable.'; - } - - // Format response for voice - const cityName = geocodeData[0].display_name.split(',')[0]; - return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; - - } catch (error) { - return 'Sorry, weather information is currently unavailable.'; + name: 'get_weather', + description: 'Get current weather information for any US city', + parameters: z.object({ + location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)'), + }), + execute: async ({ location }) => { + try { + // Step 1: Geocoding - Convert city name to coordinates + const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; + const geocodeResponse = await fetch(geocodeUrl, { + headers: { + 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0 (Contact: developer@example.com)' } + }); + + if (!geocodeResponse.ok) { + return 'Sorry, weather information is currently unavailable.'; + } + + const geocodeData = await geocodeResponse.json(); + if (!geocodeData || geocodeData.length === 0) { + return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; + } + + const lat = parseFloat(geocodeData[0].lat); + const lon = parseFloat(geocodeData[0].lon); + + // Step 2: Get weather grid point from weather.gov + const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; + const pointsResponse = await fetch(pointsUrl); + const pointsData = await pointsResponse.json(); + const forecastUrl = pointsData.properties?.forecast; + + if (!forecastUrl) { + return 'Sorry, weather information is currently unavailable.'; + } + + // Step 3: Get the detailed forecast + const forecastResponse = await fetch(forecastUrl); + const forecastData = await forecastResponse.json(); + const currentPeriod = forecastData.properties?.periods?.[0]; + + if (!currentPeriod) { + return 'Sorry, weather information is currently unavailable.'; + } + + // Format response for voice + const cityName = geocodeData[0].display_name.split(',')[0]; + return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; + + } catch (error) { + return 'Sorry, weather information is currently unavailable.'; } + } }); +``` + +In production, the `fetchWeatherData` logic is extracted to a separate helper function. -// Time tool example (no external API required) +```typescript title="src/tools/time.tool.ts" const timeTool = realtimeTool({ - name: 'get_time', - description: 'Get the current time in Eastern Time', - parameters: z.object({}), // No parameters needed - execute: async () => { - try { - const now = new Date(); - const easternTime = now.toLocaleString('en-US', { - timeZone: 'America/New_York', - timeZoneName: 'short', - weekday: 'long', - year: 'numeric', - month: 'long', - day: 'numeric', - hour: 'numeric', - minute: '2-digit' - }); - return `The current time in Eastern Time is ${easternTime}.`; - } catch (error) { - return 'Sorry, time information is currently unavailable.'; - } + name: 'get_time', + description: 'Get the current time in Eastern Time', + parameters: z.object({}), // No parameters needed + execute: async () => { + try { + const now = new Date(); + + // Always format for Eastern Time + const easternTime = now.toLocaleString('en-US', { + timeZone: 'America/New_York', + timeZoneName: 'short', + weekday: 'long', + year: 'numeric', + month: 'long', + day: 'numeric', + hour: 'numeric', + minute: '2-digit' + }); + + return `The current time in Eastern Time is ${easternTime}.`; + } catch (error) { + // Error handling omitted for simplicity + return 'Time information unavailable'; } + }, }); // Export all tools From e8e1d0302fecbe1d44fa3ebfaf8f8faf06184c66 Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 22:36:48 +0000 Subject: [PATCH 39/73] add admonition to codec negotiation block --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index a4f16ead3..329719d5b 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -275,7 +275,6 @@ export async function webhookRoute(fastify: FastifyInstance) { const protocol = request.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; const websocketUrl = `${protocol}://${host}/media-stream`; - // Codec negotiation omitted for simplicity // Generate cXML response to stream audio to our WebSocket const cXMLResponse = ` @@ -290,6 +289,10 @@ export async function webhookRoute(fastify: FastifyInstance) { } ``` +:::info Codec Negotiation +The example above uses the default codec (G.711 ΞΌ-law). For production deployments, you can enhance this by adding dynamic codec selection based on your configured audio format. The actual implementation supports both G.711 ΞΌ-law (standard telephony, 8kHz) and PCM16 (high quality, 24kHz). See [Configure audio format](#configure-audio-format) section for details. +::: + :::tip Webhook URL Format Your webhook URL must include `/incoming-call` at the end: - Local: `https://your-ngrok-url.ngrok.io/incoming-call` @@ -723,7 +726,7 @@ Latency: `100-150ms` **Default:** G.711 ΞΌ-law (matches SignalWire's default codec) -### Configure audio format +### Configure audio format {#configure-audio-format} From b50a81092b8643eef519797c9baf56255b0a32eb Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 22:47:53 +0000 Subject: [PATCH 40/73] add logging --- .../realtime-streaming-to-openai/index.mdx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 329719d5b..1c0ccc590 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -339,6 +339,20 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) model: model }); + // Listen to raw transport events for debugging + session.transport.on('*', (event) => { + switch (event.type) { + case 'response.done': + console.log('AI response completed', event); + break; + case 'conversation.item.input_audio_transcription.completed': + console.log('User transcription completed', event); + break; + default: + console.debug('Raw transport event:', event); + } + }); + // Listen to session events for tool call lifecycle session.on('agent_tool_start', (context, agent, tool, details) => { console.log('Tool call started:', details); @@ -358,8 +372,6 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) apiKey: process.env.OPENAI_API_KEY }); - // Transport event monitoring and response trigger omitted for simplicity - } catch (error) { console.error('Error initializing session:', error); } From f575e1cbac787d49ec893b6f268c6b7b221f4daf Mon Sep 17 00:00:00 2001 From: ALR Date: Tue, 4 Nov 2025 22:54:25 +0000 Subject: [PATCH 41/73] align config section --- .../realtime-streaming-to-openai/index.mdx | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 1c0ccc590..60f2b58b3 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -541,6 +541,14 @@ function getOpenAIApiKey(): string { const OPENAI_API_KEY = getOpenAIApiKey(); ``` +:::info Configuration Validation +The actual implementation includes startup validation that checks: +- **API Key**: Throws an error if `OPENAI_API_KEY` is missing, with helpful instructions for both local and Docker setups +- **Audio Format**: Validates that `AUDIO_FORMAT` is either `g711_ulaw` or `pcm16`, rejecting invalid values + +This means configuration errors are caught immediately at startup, preventing runtime failures later. If you see configuration errors when starting the application, check the error messageβ€”it includes specific instructions for fixing the issue. +::: + **Important reminders:** - Always add `secrets/` to your `.gitignore` to prevent accidental commits @@ -651,7 +659,7 @@ const timeTool = realtimeTool({ return `The current time in Eastern Time is ${easternTime}.`; } catch (error) { - // Error handling omitted for simplicity + // Return fallback message if time formatting fails return 'Time information unavailable'; } }, @@ -872,6 +880,15 @@ ENTRYPOINT ["dumb-init", "--"] CMD ["node", "dist/index.js"] ``` +:::tip Multi-Stage Build in Production +The example above uses a single-stage build for simplicity. For production deployments with smaller image sizes, the actual implementation uses a **multi-stage build pattern**: + +1. **Builder stage**: Installs all dependencies (dev + production) and builds the TypeScript +2. **Production stage**: Copies only the compiled code and production dependencies, discarding build artifacts + +This approach reduces final image size by ~60% by excluding dev dependencies, build tools, and temporary files. Consider adopting the multi-stage pattern for production deployments to reduce deployment time and storage costs. +::: + @@ -892,12 +909,7 @@ services: interval: 30s timeout: 10s retries: 3 - start_period: 10s - logging: - driver: "json-file" - options: - max-size: "10m" - max-file: "3" + start_period: 40s secrets: openai_api_key: From b28be3e842311cbdd19d25c0688bbdb2326de394 Mon Sep 17 00:00:00 2001 From: ALR Date: Wed, 5 Nov 2025 14:55:39 +0000 Subject: [PATCH 42/73] update dockerfile --- .../realtime-streaming-to-openai/index.mdx | 57 ++++++++++++------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 60f2b58b3..09f5096b1 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -846,48 +846,61 @@ For production deployments: ```dockerfile -FROM node:20-alpine +# Multi-stage Docker build for SignalWire + OpenAI Voice Assistant +# ============================================================================ -# Install system dependencies -RUN apk add --no-cache dumb-init +# Stage 1: Build stage +FROM node:20-alpine AS builder +# Set working directory WORKDIR /app # Copy package files COPY package*.json ./ -# Install dependencies -RUN npm ci --only=production && npm cache clean --force +# Install dependencies (including dev dependencies for build) +RUN npm ci --only=production=false # Copy source code COPY . . -# Build TypeScript +# Build the TypeScript application RUN npm run build -# Create non-root user +# Remove dev dependencies after build +RUN npm prune --production + +# Stage 2: Production stage +FROM node:20-alpine AS production + +# Create non-root user for security RUN addgroup -g 1001 -S nodejs && \ - adduser -S nodeuser -u 1001 + adduser -S signalwire -u 1001 -# Change ownership and switch to non-root user -RUN chown -R nodeuser:nodejs /app -USER nodeuser +# Set working directory +WORKDIR /app -EXPOSE 5050 +# Copy built application and production dependencies from builder stage +COPY --from=builder --chown=signalwire:nodejs /app/dist ./dist +COPY --from=builder --chown=signalwire:nodejs /app/node_modules ./node_modules +COPY --from=builder --chown=signalwire:nodejs /app/package*.json ./ -# Use dumb-init for proper signal handling -ENTRYPOINT ["dumb-init", "--"] -CMD ["node", "dist/index.js"] -``` +# Create directory for Docker secrets +RUN mkdir -p /run/secrets && chown signalwire:nodejs /run/secrets -:::tip Multi-Stage Build in Production -The example above uses a single-stage build for simplicity. For production deployments with smaller image sizes, the actual implementation uses a **multi-stage build pattern**: +# Switch to non-root user +USER signalwire -1. **Builder stage**: Installs all dependencies (dev + production) and builds the TypeScript -2. **Production stage**: Copies only the compiled code and production dependencies, discarding build artifacts +# Expose the application port +EXPOSE 5050 -This approach reduces final image size by ~60% by excluding dev dependencies, build tools, and temporary files. Consider adopting the multi-stage pattern for production deployments to reduce deployment time and storage costs. -::: +# Health check to ensure the service is running +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD node -e "const http = require('http'); const options = { hostname: 'localhost', port: 5050, path: '/health', timeout: 2000 }; const req = http.request(options, (res) => process.exit(res.statusCode === 200 ? 0 : 1)); req.on('error', () => process.exit(1)); req.end();" + +# Start the application +CMD ["npm", "start"] +``` From dd63525ace8769fc352b8a53b384c0581ef6a649 Mon Sep 17 00:00:00 2001 From: ALR Date: Wed, 5 Nov 2025 15:34:49 +0000 Subject: [PATCH 43/73] weather function --- .../realtime-streaming-to-openai/index.mdx | 121 +++++++++++------- 1 file changed, 74 insertions(+), 47 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 09f5096b1..026b3f284 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -574,68 +574,95 @@ Tools are functions the AI can call during a conversation. Here's the structure ```typescript title="src/tools/weather.tool.ts" import { z } from 'zod'; import { tool as realtimeTool } from '@openai/agents/realtime'; +import { ERROR_MESSAGES } from '../constants.js'; + +/** + * Fetches weather data using the free US National Weather Service API + * + * Flow: + * 1. Convert city name to coordinates (OpenStreetMap Nominatim) + * 2. Get weather grid point from coordinates (weather.gov) + * 3. Fetch detailed forecast for that grid point + */ +async function fetchWeatherData(location: string): Promise { + try { + // Step 1: Geocoding - Convert city name to coordinates + const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; -// Weather tool that calls an external API -const weatherTool = realtimeTool({ - name: 'get_weather', - description: 'Get current weather information for any US city', - parameters: z.object({ - location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)'), - }), - execute: async ({ location }) => { - try { - // Step 1: Geocoding - Convert city name to coordinates - const geocodeUrl = `https://nominatim.openstreetmap.org/search?format=json&q=${encodeURIComponent(location)}&countrycodes=us&limit=1`; - const geocodeResponse = await fetch(geocodeUrl, { - headers: { - 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0 (Contact: developer@example.com)' - } - }); - - if (!geocodeResponse.ok) { - return 'Sorry, weather information is currently unavailable.'; + const geocodeResponse = await fetch(geocodeUrl, { + headers: { + 'User-Agent': 'SignalWire-OpenAI-Voice-Assistant/1.0.0 (Contact: developer@example.com)' } + }); - const geocodeData = await geocodeResponse.json(); - if (!geocodeData || geocodeData.length === 0) { - return `Sorry, I couldn't find the location "${location}". Please try a different city name.`; - } + if (!geocodeResponse.ok) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; + } - const lat = parseFloat(geocodeData[0].lat); - const lon = parseFloat(geocodeData[0].lon); + const geocodeData = await geocodeResponse.json(); - // Step 2: Get weather grid point from weather.gov - const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; - const pointsResponse = await fetch(pointsUrl); - const pointsData = await pointsResponse.json(); - const forecastUrl = pointsData.properties?.forecast; + if (!geocodeData || geocodeData.length === 0) { + return ERROR_MESSAGES.CITY_NOT_FOUND(location); + } - if (!forecastUrl) { - return 'Sorry, weather information is currently unavailable.'; - } + const lat = parseFloat(geocodeData[0].lat); + const lon = parseFloat(geocodeData[0].lon); - // Step 3: Get the detailed forecast - const forecastResponse = await fetch(forecastUrl); - const forecastData = await forecastResponse.json(); - const currentPeriod = forecastData.properties?.periods?.[0]; + // Step 2: Get weather grid point from weather.gov + const pointsUrl = `https://api.weather.gov/points/${lat},${lon}`; - if (!currentPeriod) { - return 'Sorry, weather information is currently unavailable.'; - } + const pointsResponse = await fetch(pointsUrl); - // Format response for voice - const cityName = geocodeData[0].display_name.split(',')[0]; - return `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; + if (!pointsResponse.ok) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; + } - } catch (error) { - return 'Sorry, weather information is currently unavailable.'; + const pointsData = await pointsResponse.json(); + + // Step 3: Get the detailed forecast + const forecastUrl = pointsData.properties?.forecast; + + if (!forecastUrl) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; } + + const forecastResponse = await fetch(forecastUrl); + + if (!forecastResponse.ok) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; + } + + const forecastData = await forecastResponse.json(); + + const currentPeriod = forecastData.properties?.periods?.[0]; + if (!currentPeriod) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; + } + + // Format the response for voice + const cityName = geocodeData[0].display_name.split(',')[0]; + const weatherReport = `In ${cityName}, it's currently ${currentPeriod.detailedForecast.toLowerCase()}`; + + return weatherReport; + + } catch (error) { + return ERROR_MESSAGES.WEATHER_UNAVAILABLE; } +} + +export const weatherTool = realtimeTool({ + name: 'get_weather', + description: 'Get current weather information for any US city', + parameters: z.object({ + location: z.string().describe('The US city or location to get weather for (include state if needed for clarity)'), + }), + execute: async ({ location }) => { + const weatherData = await fetchWeatherData(location); + return weatherData; + }, }); ``` -In production, the `fetchWeatherData` logic is extracted to a separate helper function. - ```typescript title="src/tools/time.tool.ts" const timeTool = realtimeTool({ name: 'get_time', From 5f0a41186f7bf5eb0d2ad1b3bc596b97de7e18e6 Mon Sep 17 00:00:00 2001 From: ALR Date: Wed, 5 Nov 2025 15:39:27 +0000 Subject: [PATCH 44/73] align sample --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 026b3f284..2678a7cf3 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -372,6 +372,14 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) apiKey: process.env.OPENAI_API_KEY }); + // Trigger immediate AI response + try { + const responseEvent = { type: 'response.create' }; + signalWireTransportLayer.sendEvent(responseEvent); + } catch (error) { + // AI-first response trigger failed, but session continues + } + } catch (error) { console.error('Error initializing session:', error); } From 369138bc5ad82fcf3787a2e7857076876b288f1e Mon Sep 17 00:00:00 2001 From: ALR Date: Wed, 5 Nov 2025 15:40:00 +0000 Subject: [PATCH 45/73] update time tool, remove unnecessary comment --- .../realtime-streaming-to-openai/index.mdx | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 2678a7cf3..2fe9b8527 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -672,7 +672,11 @@ export const weatherTool = realtimeTool({ ``` ```typescript title="src/tools/time.tool.ts" -const timeTool = realtimeTool({ +import { z } from 'zod'; +import { tool as realtimeTool } from '@openai/agents/realtime'; +import { ERROR_MESSAGES } from '../constants.js'; + +export const timeTool = realtimeTool({ name: 'get_time', description: 'Get the current time in Eastern Time', parameters: z.object({}), // No parameters needed @@ -695,24 +699,10 @@ const timeTool = realtimeTool({ return `The current time in Eastern Time is ${easternTime}.`; } catch (error) { // Return fallback message if time formatting fails - return 'Time information unavailable'; + return ERROR_MESSAGES.TIME_UNAVAILABLE; } }, }); - -// Export all tools -export const allTools = [weatherTool, timeTool]; - -// Add to your AI agent configuration -const agentConfig = { - name: 'SignalWire Voice Assistant', - instructions: `You are a helpful and friendly voice assistant. - Always start every conversation by greeting the caller first. - You can help with weather information, time queries, and general conversation. - Be concise and friendly in your responses.`, - tools: allTools, - voice: 'alloy' -}; ``` 1. **User asks**: "What's the weather in New York?" From 83a5c556624b2f323ac2daa80ce49d9a64ae4ddf Mon Sep 17 00:00:00 2001 From: ALR Date: Wed, 5 Nov 2025 20:18:25 +0000 Subject: [PATCH 46/73] remove redundant section --- .../realtime-streaming-to-openai/index.mdx | 50 ------------------- 1 file changed, 50 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 2fe9b8527..9db3e4964 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -391,56 +391,6 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) - - -The `SignalWireCompatibilityTransportLayer` is the critical component that bridges SignalWire's WebSocket protocol with OpenAI's Realtime API: - -```typescript title="Transport layer instantiation" -// Key features of the transport layer: -const transport = new SignalWireCompatibilityTransportLayer({ - signalWireWebSocket: connection, - audioFormat: 'g711_ulaw' // or 'pcm16' -}); - -// Automatic handling of: -// 1. Audio format conversion -// 2. Base64 encoding/decoding -// 3. Interruption detection -// 4. Mark event tracking -// 5. Session cleanup -``` - -**Session lifecycle:** -1. **WebSocket connection** β†’ SignalWire connects to `/media-stream` -2. **Transport creation** β†’ Bridge between SignalWire and OpenAI -3. **AI session start** β†’ RealtimeSession connects to OpenAI -4. **Audio streaming** β†’ Bidirectional real-time audio -5. **Tool execution** β†’ Function calls processed server-side -6. **Session cleanup** β†’ Graceful disconnect and resource cleanup - -SignalWire sends several types of messages through the WebSocket: - -| Event | Purpose | Key data | -|-------|---------|----------| -| `start` | Connection initialized | `streamSid`, `callSid`, `mediaFormat` | -| `media` | Audio data packet (~20ms) | Base64 encoded `payload`, `track` | -| `mark` | Audio playback confirmation | `name` (for timing) | -| `stop` | Stream ending | None | - -**Key features** -- **Automatic audio format conversion** between SignalWire and OpenAI -- **Interruption handling** using `clear` events and mark tracking -- **Base64 encoding/decoding** for audio data -- **Session lifecycle management** with proper cleanup -- **Error recovery** and reconnection handling - -**Audio format support:** -- **Input**: G.711 ΞΌ-law (8kHz) or PCM16 (24kHz) from SignalWire -- **Output**: Matches input format automatically -- **OpenAI Integration**: Handles format negotiation transparently - - - The AI agent configuration defines how your assistant behaves. Import your tools and set instructions: From 155fc3750977933fa5f2339208b361a685b7f68f Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 16:52:35 +0000 Subject: [PATCH 47/73] update links, cards, icons --- .../realtime-streaming-to-openai/index.mdx | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 9db3e4964..e1d22c973 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -15,7 +15,8 @@ x-custom: import AddResource from '/docs/main/_common/dashboard/add-resource.mdx'; import ResourcesFyi from '/docs/main/_common/call-fabric/resources-fyi-card.mdx'; -import { MdCode } from "react-icons/md"; +import { MdCode, MdDescription, MdLibraryBooks } from "react-icons/md"; +import { SiGithub, SiOpenai, SiNpm } from "react-icons/si"; # Stream an OpenAI Realtime API agent with a cXML script @@ -1041,50 +1042,37 @@ DEBUG=openai-agents:* npm run dev - [ ] Function calls (weather, time) work - [ ] Interruptions handled gracefully -## Complete example - -See the GitHub repo for a complete working example, including -weather and time function usage, -error handling, -and a production Docker setup. - -} - href="https://github.com/signalwire/cXML-realtime-agent-stream" -/> - --- ## Resources } + title="SignalWire + OpenAI Realtime" + description="Production-ready implementation with all features" + icon={} + href="https://github.com/signalwire/cXML-realtime-agent-stream" > - Learn about SignalWire's Call Fabric platform + Complete working example with weather and time functions, error handling, and production Docker setup } + icon={} > Official documentation for the OpenAI Realtime API } + icon={} > Complete reference for Compatibility XML } + icon={} > NPM package documentation for the OpenAI Agents SDK From 2cd02270969dbc55696ba08d1f616b0d1723eeba Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 16:54:46 +0000 Subject: [PATCH 48/73] remove hallucinated Advanced Configuration section --- .../realtime-streaming-to-openai/index.mdx | 57 ------------------- 1 file changed, 57 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index e1d22c973..2a8835990 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -746,63 +746,6 @@ AUDIO_FORMAT=pcm16 # or g711_ulaw -### Advanced configuration - - - - -The transport layer automatically handles interruptions: - -```typescript -// When user interrupts AI speech: -// 1. Transport detects voice activity -// 2. Sends 'clear' event to SignalWire -// 3. Truncates OpenAI audio at last played position -// 4. Resumes with new user input - -session.on('interruption', (event) => { - console.log('πŸ›‘ User interrupted AI speech'); -}); -``` - - - - -Mark events track audio playback timing: - -```typescript -// Transport sends mark events for each audio chunk -{ - "event": "mark", - "mark": { "name": "item123:45" }, // itemId:chunkNumber - "streamSid": "..." -} - -// Used for precise interruption timing -``` - - - - -Built-in error handling and recovery: - -```typescript -session.on('error', (error) => { - console.error('Session error:', error); - // Transport automatically attempts reconnection -}); - -transport.on('*', (event) => { - if (event.type === 'transport_error') { - // Handle transport-specific errors - console.error('Transport error:', event.error); - } -}); -``` - - - - :::tip Performance Optimization For production deployments: - Use **G.711 ΞΌ-law** for standard phone calls (lower latency) From 36acabec15d899517e66fbe963f795236153a888 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 16:58:30 +0000 Subject: [PATCH 49/73] delete more hallucinations --- .../realtime-streaming-to-openai/index.mdx | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 2a8835990..5c10273c9 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -888,31 +888,12 @@ npm run typecheck # Production build npm run build && npm start - -# Debug logging -DEBUG=openai-agents:* npm run dev ``` --- -**Console output to look for:** -```bash -πŸ“‘ Server running on http://0.0.0.0:5050 -πŸ₯ Health check: http://0.0.0.0:5050/health -πŸ”Š Audio format: g711_ulaw (8kHz telephony) -πŸŽ™οΈ Voice: alloy - -# When calls come in: -πŸ“ž Incoming call - Audio format: g711_ulaw, SignalWire codec: default -πŸ“± Client connected to WebSocket -πŸ”§ Tool call started: get_weather -βœ… Tool call completed: get_weather -``` - ---- - ## Common issues & solutions ### Debugging From 7b4e739baf1a5de407ad52124cb6921474427c97 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:12:48 +0000 Subject: [PATCH 50/73] update codec cards --- .../nodejs/realtime-streaming-to-openai/index.mdx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 5c10273c9..8b7cba6f8 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -704,19 +704,18 @@ Choose the right audio codec for your use case: title="PCM16 @ 24kHz" description="Crystal clear audio for demos and high-quality applications" > -Sample rate: `24 kHz` -Bandwidth: `~384 kbps` -Quality: `High definition` -Latency: `150-300ms` + **Sample rate:** `24 kHz` + **Bandwidth:** `~384 kbps` + **Quality:** `High definition`
-Sample rate: `8 kHz` -Bandwidth: `~64 kbps` -Quality: `Standard telephony` -Latency: `100-150ms` + **Sample rate:** `8 kHz` + **Bandwidth:** `~64 kbps` + **Quality:** `Standard telephony` + **Latency:** `100-150ms` From 3e5278d65b79567dba3c4ae960a6795c123e6a46 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:21:32 +0000 Subject: [PATCH 51/73] extraneous tabs --- .../nodejs/realtime-streaming-to-openai/index.mdx | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 8b7cba6f8..12944aac0 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -308,9 +308,6 @@ Next, we will create a WebSocket server to handle bidirectional audio streaming. **Initialize WebSocket server** - - - ```typescript title="src/routes/streaming.ts" import type { WebSocket } from 'ws'; import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; @@ -387,9 +384,6 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) }); ``` - - - @@ -527,9 +521,6 @@ Enable your AI to execute server-side tools during conversations. Tools are functions the AI can call during a conversation. Here's the structureβ€”in production, you extract the implementation logic into separate files as shown in the repository. - - - ```typescript title="src/tools/weather.tool.ts" import { z } from 'zod'; import { tool as realtimeTool } from '@openai/agents/realtime'; @@ -665,9 +656,6 @@ export const timeTool = realtimeTool({ All of this happens in real-time during the conversation. - - - From f49ecaa8e9fb147f07d6d9ff4c29347b963ca341 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:29:11 +0000 Subject: [PATCH 52/73] misc --- .../nodejs/realtime-streaming-to-openai/index.mdx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 12944aac0..3610d01a8 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -260,7 +260,7 @@ You should now be speaking to your newly created agent! - + First, your server needs to handle incoming call webhooks from SignalWire. @@ -519,7 +519,8 @@ Enable your AI to execute server-side tools during conversations. **Define tools** -Tools are functions the AI can call during a conversation. Here's the structureβ€”in production, you extract the implementation logic into separate files as shown in the repository. +Tools are functions the AI can call during a conversation. +Here's the structureβ€”in production, you extract the implementation logic into separate files as shown in the repository. ```typescript title="src/tools/weather.tool.ts" import { z } from 'zod'; @@ -658,7 +659,7 @@ All of this happens in real-time during the conversation. - + **System components** @@ -685,7 +686,8 @@ The voice assistant consists of four key components: ### Audio format comparison -Choose the right audio codec for your use case: +Choose the right audio codec for your use case. +The default is G.711 ΞΌ-law. -**Default:** G.711 ΞΌ-law (matches SignalWire's default codec) - ### Configure audio format {#configure-audio-format} From ace3a3e87beacc09945c76e427371c5c32716806 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:30:02 +0000 Subject: [PATCH 53/73] remove 'production best practices' --- .../realtime-streaming-to-openai/index.mdx | 41 ------------------- 1 file changed, 41 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 3610d01a8..33282c85d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -838,47 +838,6 @@ secrets: - - -**Security & Secrets:** -- Use Docker secrets or external secret management (AWS Secrets Manager, Azure Key Vault) -- Never commit API keys to version control -- Use non-root user in Docker containers -- Implement proper CORS and rate limiting - -**Monitoring & Observability:** -- Set up health checks (`/health` endpoint included) -- Implement structured logging with correlation IDs -- Monitor WebSocket connection metrics -- Track audio latency and quality metrics -- Set up alerting for failed calls - -**Scalability & Performance:** -- Use horizontal scaling with session affinity -- Implement connection pooling for high traffic -- Consider using Redis for session state if needed -- Monitor memory usage (audio buffers can accumulate) - -**Error handling:** -- Graceful degradation when OpenAI API is unavailable -- Retry logic with exponential backoff -- Proper WebSocket reconnection handling -- Fallback responses when tools fail - -**Development workflow:** -```bash -# Local development with hot reload -npm run dev - -# Type checking -npm run typecheck - -# Production build -npm run build && npm start -``` - - - --- ## Common issues & solutions From b113c2b3b58b3309d7e59fef567a8ccc707be1f4 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:40:48 +0000 Subject: [PATCH 54/73] move environment configuration section --- .../realtime-streaming-to-openai/index.mdx | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 33282c85d..5819543f9 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -838,6 +838,103 @@ secrets: +### Environment configuration + +Set up your environment variables for different deployment scenarios: + + + + +Create a `.env` file in your project root: + +```bash +# Required +OPENAI_API_KEY=sk-your-actual-api-key-here + +# Optional +PORT=5050 +AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio +``` + + + + +For production, store your API credentials securely using Docker secrets rather than environment variables. This keeps sensitive data out of version control and environment files. + +**Set up secrets:** + +```bash +mkdir -p secrets +echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt +``` + +**docker-compose.yml configuration:** + +The `docker-compose.yml` file references the secret and mounts it into the container: + +```yaml title="docker-compose.yml" +services: + signalwire-assistant: + # ... other config + secrets: + - openai_api_key + +secrets: + openai_api_key: + file: ./secrets/openai_api_key.txt +``` + +**Reading secrets in your application:** + +Your application reads from the Docker secret at runtime, checking the secret file first and falling back to an environment variable: + +```typescript title="src/config.ts - Read Docker secrets" +import * as fs from 'fs'; + +function getOpenAIApiKey(): string { + // First try to read from Docker secret (for containerized deployments) + const secretPath = '/run/secrets/openai_api_key'; + try { + if (fs.existsSync(secretPath)) { + const apiKey = fs.readFileSync(secretPath, 'utf8').trim(); + if (apiKey) { + return apiKey; + } + } + } catch (error) { + // Fall back to environment variable if secret reading fails + // (logging omitted for simplicity) + } + + // Fallback to environment variable + const envApiKey = process.env.OPENAI_API_KEY; + if (envApiKey) { + return envApiKey; + } + + return ''; +} + +const OPENAI_API_KEY = getOpenAIApiKey(); +``` + +:::info Configuration Validation +The actual implementation includes startup validation that checks: +- **API Key**: Throws an error if `OPENAI_API_KEY` is missing, with helpful instructions for both local and Docker setups +- **Audio Format**: Validates that `AUDIO_FORMAT` is either `g711_ulaw` or `pcm16`, rejecting invalid values + +This means configuration errors are caught immediately at startup, preventing runtime failures later. If you see configuration errors when starting the application, check the error messageβ€”it includes specific instructions for fixing the issue. +::: + +**Important reminders:** + +- Always add `secrets/` to your `.gitignore` to prevent accidental commits +- Docker secrets are mounted at `/run/secrets/` inside the container +- Keep credentials out of `.env` files and version control + + + + --- ## Common issues & solutions From 0f4daa1353e73eacd1fa89713d1eba0a041a471c Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:40:58 +0000 Subject: [PATCH 55/73] align tab ids --- .../realtime-streaming-to-openai/index.mdx | 101 +----------------- 1 file changed, 2 insertions(+), 99 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 5819543f9..486742164 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -414,103 +414,6 @@ export const AGENT_CONFIG: RealtimeAgentConfiguration = { // The agent is instantiated in the WebSocket handler with: new RealtimeAgent(AGENT_CONFIG) ``` -**Environment configuration** - -Set up your environment variables for different deployment scenarios: - - - - -Create a `.env` file in your project root: - -```bash -# Required -OPENAI_API_KEY=sk-your-actual-api-key-here - -# Optional -PORT=5050 -AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio -``` - - - - -For production, store your API credentials securely using Docker secrets rather than environment variables. This keeps sensitive data out of version control and environment files. - -**Set up secrets:** - -```bash -mkdir -p secrets -echo "sk-your-actual-api-key-here" > secrets/openai_api_key.txt -``` - -**docker-compose.yml configuration:** - -The `docker-compose.yml` file references the secret and mounts it into the container: - -```yaml title="docker-compose.yml" -services: - signalwire-assistant: - # ... other config - secrets: - - openai_api_key - -secrets: - openai_api_key: - file: ./secrets/openai_api_key.txt -``` - -**Reading secrets in your application:** - -Your application reads from the Docker secret at runtime, checking the secret file first and falling back to an environment variable: - -```typescript title="src/config.ts - Read Docker secrets" -import * as fs from 'fs'; - -function getOpenAIApiKey(): string { - // First try to read from Docker secret (for containerized deployments) - const secretPath = '/run/secrets/openai_api_key'; - try { - if (fs.existsSync(secretPath)) { - const apiKey = fs.readFileSync(secretPath, 'utf8').trim(); - if (apiKey) { - return apiKey; - } - } - } catch (error) { - // Fall back to environment variable if secret reading fails - // (logging omitted for simplicity) - } - - // Fallback to environment variable - const envApiKey = process.env.OPENAI_API_KEY; - if (envApiKey) { - return envApiKey; - } - - return ''; -} - -const OPENAI_API_KEY = getOpenAIApiKey(); -``` - -:::info Configuration Validation -The actual implementation includes startup validation that checks: -- **API Key**: Throws an error if `OPENAI_API_KEY` is missing, with helpful instructions for both local and Docker setups -- **Audio Format**: Validates that `AUDIO_FORMAT` is either `g711_ulaw` or `pcm16`, rejecting invalid values - -This means configuration errors are caught immediately at startup, preventing runtime failures later. If you see configuration errors when starting the application, check the error messageβ€”it includes specific instructions for fixing the issue. -::: - -**Important reminders:** - -- Always add `secrets/` to your `.gitignore` to prevent accidental commits -- Docker secrets are mounted at `/run/secrets/` inside the container -- Keep credentials out of `.env` files and version control - - - - @@ -843,7 +746,7 @@ secrets: Set up your environment variables for different deployment scenarios: - + Create a `.env` file in your project root: @@ -857,7 +760,7 @@ AUDIO_FORMAT=g711_ulaw # or 'pcm16' for HD audio ``` - + For production, store your API credentials securely using Docker secrets rather than environment variables. This keeps sensitive data out of version control and environment files. From de523b434123ebe5f3d2e658e2b36e0eff0e9ae0 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:47:27 +0000 Subject: [PATCH 56/73] simplify ngrok section --- .../realtime-streaming-to-openai/index.mdx | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 486742164..b684ca97b 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -193,31 +193,26 @@ Select the **Local** tab below if you ran the application locally, and the **Doc -SignalWire must be able to reach your webhook from the internet. -For local development, use -[ngrok](https://ngrok.com/), -or other -[tunneling service](https://github.com/anderspitman/awesome-tunneling), -to create a secure public tunnel to your local server. - -**Why ngrok is needed:** SignalWire's servers make HTTPS calls to your webhook URL when incoming calls arrive. Your local computer is behind a firewall and not accessible from the internet, so ngrok creates a publicly accessible HTTPS proxy that forwards traffic to your `localhost:5050`. +SignalWire must be able to reach your webhook from the internet. For local development, use [ngrok](https://ngrok.com/) or another [tunneling service](https://github.com/anderspitman/awesome-tunneling) to expose your local server. Use ngrok to expose port 5050 on your development machine: -```bash title="Start ngrok tunnel" +```bash ngrok http 5050 ``` The output will look like: -```bash title="ngrok output" +```bash Forwarding https://abc123def456.ngrok.io -> http://localhost:5050 ``` Append `/incoming-call` to the HTTPS URL provided by ngrok: -```bash title="Webhook URL" +``` https://abc123def456.ngrok.io/incoming-call ``` +Use this as the **Primary Script URL** when creating your cXML script in the SignalWire Dashboard. + For production environments, set your server URL + `/incoming-call`: From 49f8172d9f99efb1eee1da045a06d1869eacf173 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:47:35 +0000 Subject: [PATCH 57/73] rm hr --- .../guides/voice/nodejs/realtime-streaming-to-openai/index.mdx | 2 -- 1 file changed, 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index b684ca97b..34dfebe41 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -580,8 +580,6 @@ The voice assistant consists of four key components: ---- - ### Audio format comparison Choose the right audio codec for your use case. From f0297aaa36a58306bcf65a99f323edf69f68d8bc Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:48:22 +0000 Subject: [PATCH 58/73] codec --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 34dfebe41..994fd595d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -598,10 +598,9 @@ The default is G.711 ΞΌ-law. title="G.711 ΞΌ-law @ 8kHz" description="Standard telephony quality, lower bandwidth usage" > - **Sample rate:** `8 kHz` - **Bandwidth:** `~64 kbps` - **Quality:** `Standard telephony` - **Latency:** `100-150ms` + **Sample rate:** `8 kHz` + **Bandwidth:** `~64 kbps` + **Quality:** `Standard telephony`
From 4305de0f92ff7dc8746563e3ae295dc91584ef6b Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:57:47 +0000 Subject: [PATCH 59/73] remove code blocks that just represent the dockerfile and docker-compose.yml --- .../realtime-streaming-to-openai/index.mdx | 95 +------------------ 1 file changed, 2 insertions(+), 93 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 994fd595d..ea6a55235 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -639,99 +639,7 @@ For production deployments: --- -## Deployment - -### Production with Docker - - - - -```dockerfile -# Multi-stage Docker build for SignalWire + OpenAI Voice Assistant -# ============================================================================ - -# Stage 1: Build stage -FROM node:20-alpine AS builder - -# Set working directory -WORKDIR /app - -# Copy package files -COPY package*.json ./ - -# Install dependencies (including dev dependencies for build) -RUN npm ci --only=production=false - -# Copy source code -COPY . . - -# Build the TypeScript application -RUN npm run build - -# Remove dev dependencies after build -RUN npm prune --production - -# Stage 2: Production stage -FROM node:20-alpine AS production - -# Create non-root user for security -RUN addgroup -g 1001 -S nodejs && \ - adduser -S signalwire -u 1001 - -# Set working directory -WORKDIR /app - -# Copy built application and production dependencies from builder stage -COPY --from=builder --chown=signalwire:nodejs /app/dist ./dist -COPY --from=builder --chown=signalwire:nodejs /app/node_modules ./node_modules -COPY --from=builder --chown=signalwire:nodejs /app/package*.json ./ - -# Create directory for Docker secrets -RUN mkdir -p /run/secrets && chown signalwire:nodejs /run/secrets - -# Switch to non-root user -USER signalwire - -# Expose the application port -EXPOSE 5050 - -# Health check to ensure the service is running -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD node -e "const http = require('http'); const options = { hostname: 'localhost', port: 5050, path: '/health', timeout: 2000 }; const req = http.request(options, (res) => process.exit(res.statusCode === 200 ? 0 : 1)); req.on('error', () => process.exit(1)); req.end();" - -# Start the application -CMD ["npm", "start"] -``` - - - - -```yaml -services: - signalwire-assistant: - build: . - ports: - - "${PORT:-5050}:${PORT:-5050}" - environment: - - PORT=${PORT:-5050} - - AUDIO_FORMAT=pcm16 - secrets: - - openai_api_key - restart: unless-stopped - healthcheck: - test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5050/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - -secrets: - openai_api_key: - file: ./secrets/openai_api_key.txt -``` - - - +## Setup & Configuration ### Environment configuration @@ -830,6 +738,7 @@ This means configuration errors are caught immediately at startup, preventing ru +### Audio format configuration {#configure-audio-format} --- ## Common issues & solutions From d451f0f2b980726cc6ef6f341190d4f0a093b491 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:58:15 +0000 Subject: [PATCH 60/73] reorg --- .../realtime-streaming-to-openai/index.mdx | 103 ++++++++---------- 1 file changed, 46 insertions(+), 57 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index ea6a55235..800dd8fe9 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -580,63 +580,6 @@ The voice assistant consists of four key components: -### Audio format comparison - -Choose the right audio codec for your use case. -The default is G.711 ΞΌ-law. - - - - **Sample rate:** `24 kHz` - **Bandwidth:** `~384 kbps` - **Quality:** `High definition` - - - **Sample rate:** `8 kHz` - **Bandwidth:** `~64 kbps` - **Quality:** `Standard telephony` - - - -### Configure audio format {#configure-audio-format} - - - - -```xml - - - - - -``` - - - - -```bash -# In your .env file -AUDIO_FORMAT=pcm16 # or g711_ulaw -``` - - - - -:::tip Performance Optimization -For production deployments: -- Use **G.711 ΞΌ-law** for standard phone calls (lower latency) -- Use **PCM16** for high-fidelity demos (better quality) -- Monitor WebSocket connection stability -- Implement connection pooling for high traffic -- Track audio latency metrics -::: - --- ## Setup & Configuration @@ -739,6 +682,52 @@ This means configuration errors are caught immediately at startup, preventing ru ### Audio format configuration {#configure-audio-format} + +Choose the right audio codec for your use case. The default is G.711 ΞΌ-law. + + + + **Sample rate:** `24 kHz` + **Bandwidth:** `~384 kbps` + **Quality:** `High definition` + + + **Sample rate:** `8 kHz` + **Bandwidth:** `~64 kbps` + **Quality:** `Standard telephony` + + + +**To configure your audio format:** + + + + +```xml + + + + + +``` + + + + +```bash +# In your .env file +AUDIO_FORMAT=pcm16 # or g711_ulaw +``` + + + + --- ## Common issues & solutions From 979a53704a240d34df2c3a59ccc9aa7d1a6e7cba Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 17:59:41 +0000 Subject: [PATCH 61/73] rm hr --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 800dd8fe9..75a9dcd99 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -580,8 +580,6 @@ The voice assistant consists of four key components: ---- - ## Setup & Configuration ### Environment configuration @@ -728,8 +726,6 @@ AUDIO_FORMAT=pcm16 # or g711_ulaw ---- - ## Common issues & solutions ### Debugging @@ -802,8 +798,6 @@ AUDIO_FORMAT=pcm16 # or g711_ulaw - [ ] Function calls (weather, time) work - [ ] Interruptions handled gracefully ---- - ## Resources From cdd3193707bc9ae26868dc98089b8697a02b22d0 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:00:31 +0000 Subject: [PATCH 62/73] headers --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 75a9dcd99..20839f5a8 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -580,7 +580,7 @@ The voice assistant consists of four key components: -## Setup & Configuration +## Configuration ### Environment configuration @@ -679,7 +679,7 @@ This means configuration errors are caught immediately at startup, preventing ru -### Audio format configuration {#configure-audio-format} +### Audio codec {#configure-audio-format} Choose the right audio codec for your use case. The default is G.711 ΞΌ-law. From c1e3a872d344d945fa4cea63f83e7b1173674e8a Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:13:21 +0000 Subject: [PATCH 63/73] titles, and streamline troubleshooting sections --- .../realtime-streaming-to-openai/index.mdx | 75 ++----------------- 1 file changed, 7 insertions(+), 68 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 20839f5a8..7ec9e989b 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -582,7 +582,7 @@ The voice assistant consists of four key components: ## Configuration -### Environment configuration +### Environment variables Set up your environment variables for different deployment scenarios: @@ -726,77 +726,16 @@ AUDIO_FORMAT=pcm16 # or g711_ulaw -## Common issues & solutions +## Troubleshooting -### Debugging - - - - - - - - -### Troubleshooting guide +Refer to this table if you encounter issues running the application. | Issue | Cause | Solution | |-------|-------|----------| -| No audio from AI | Codec mismatch or transport error | Check `AUDIO_FORMAT` env var, verify SignalWire codec setting | -| High latency | Network or buffering issues | Use `g711_ulaw` for lower latency, check network | -| WebSocket disconnections | Network timeout or server overload | Implement reconnection logic, monitor server resources | -| Function calls fail | Network issues or API errors | Add retry logic, check API quotas and keys | -| "Missing OPENAI_API_KEY" | Configuration error | Verify .env file or Docker secrets setup | -| Calls not connecting | Webhook URL issues | Ensure URL is public and includes `/incoming-call` | -| Audio quality poor | Wrong codec configuration | Match audio format between SignalWire and application | -| Memory leaks | Audio buffer accumulation | Monitor memory usage, implement cleanup | -| Session errors | OpenAI API issues | Check API status, implement fallback responses | - -### Debug checklist - -**Basic setup:** -- [ ] Webhook URL includes `/incoming-call` endpoint -- [ ] ngrok is running and exposing port 5050 (for local dev) -- [ ] OpenAI API key is properly configured -- [ ] Node.js 20+ is installed -- [ ] All npm dependencies installed (`npm install`) - -**Configuration:** -- [ ] Audio format matches SignalWire codec setting -- [ ] Environment variables properly set -- [ ] Docker secrets configured (if using Docker) -- [ ] Port 5050 is available and not blocked - -**Runtime:** -- [ ] WebSocket connection establishes successfully -- [ ] Function tools are registered and accessible -- [ ] Health check endpoint responds (`/health`) -- [ ] Console logs show proper connection messages -- [ ] No error messages in server logs - -**SignalWire Integration:** -- [ ] cXML resource properly configured -- [ ] SIP address or phone number linked to resource -- [ ] Webhook URL is publicly accessible -- [ ] SignalWire project settings correct - -**Testing:** -- [ ] Can make test calls to SIP address -- [ ] Audio flows both directions -- [ ] AI responds appropriately -- [ ] Function calls (weather, time) work -- [ ] Interruptions handled gracefully +| No audio from AI | Codec mismatch or incorrect codec configuration | β€’ Check `AUDIO_FORMAT` environment variable
β€’ Verify SignalWire and application codec match | +| Missing OPENAI_API_KEY | Configuration error | β€’ Verify `OPENAI_API_KEY` in `.env` file (local)
β€’ Verify Docker secrets are configured (Docker) | +| Calls not connecting | Webhook URL misconfiguration | β€’ Verify URL is publicly accessible in SignalWire Dashboard
β€’ Ensure URL includes `/incoming-call` | +| Memory leaks | Audio buffer accumulation | β€’ Monitor memory usage
β€’ Add buffer cleanup logic | ## Resources From bb7649b03193e853b74c7f230e4608a5ede81560 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:21:18 +0000 Subject: [PATCH 64/73] more troubleshooting solutions --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 7ec9e989b..1246c81ce 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -733,9 +733,11 @@ Refer to this table if you encounter issues running the application. | Issue | Cause | Solution | |-------|-------|----------| | No audio from AI | Codec mismatch or incorrect codec configuration | β€’ Check `AUDIO_FORMAT` environment variable
β€’ Verify SignalWire and application codec match | +| Invalid AUDIO_FORMAT error | Invalid environment variable value | β€’ Verify `AUDIO_FORMAT` is either `g711_ulaw` or `pcm16`
β€’ Check for typos or extra whitespace
β€’ Remove the variable to use default (`g711_ulaw`) | +| Server fails to start | Port 5050 already in use | β€’ Check what's running on port 5050: `lsof -i :5050`
β€’ Stop the conflicting application or use a different port with `PORT=5051 npm start` | +| Health check failing | Server crashed or not responding | β€’ Check server logs for error messages
β€’ Verify all configuration is correct
β€’ Try accessing `/health` endpoint directly in browser | | Missing OPENAI_API_KEY | Configuration error | β€’ Verify `OPENAI_API_KEY` in `.env` file (local)
β€’ Verify Docker secrets are configured (Docker) | -| Calls not connecting | Webhook URL misconfiguration | β€’ Verify URL is publicly accessible in SignalWire Dashboard
β€’ Ensure URL includes `/incoming-call` | -| Memory leaks | Audio buffer accumulation | β€’ Monitor memory usage
β€’ Add buffer cleanup logic | +| Calls not connecting after ngrok restart | ngrok URL changed | β€’ ngrok generates a new URL each time you restart
β€’ Update the webhook URL in SignalWire Dashboard with the new ngrok URL
β€’ Restart ngrok and update SignalWire before testing | ## Resources From 568e963e85e0efa81a256eef64cc6486cefd27ed Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:25:27 +0000 Subject: [PATCH 65/73] streamline codec --- .../realtime-streaming-to-openai/index.mdx | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 1246c81ce..f58e999ce 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -702,29 +702,14 @@ Choose the right audio codec for your use case. The default is G.711 ΞΌ-law.
-**To configure your audio format:** - - - - -```xml - - - - - -``` - - - +The application automatically sets the correct codec in your cXML response based on the `AUDIO_FORMAT` environment variable. Just configure the environment variable: ```bash # In your .env file AUDIO_FORMAT=pcm16 # or g711_ulaw ``` - - +The application will use `pcm16` (24kHz HD audio) when set, or default to `g711_ulaw` (8kHz standard telephony) if not set. ## Troubleshooting From cb79be44932223e20c965e335a87637d291bc404 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:38:13 +0000 Subject: [PATCH 66/73] intro and new 'system architecture' accordion for How it Works --- .../realtime-streaming-to-openai/index.mdx | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index f58e999ce..051b46e23 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -253,8 +253,31 @@ You should now be speaking to your newly created agent! ## How it works +This section walks through the key components of the integration. Start with the system architecture to understand the full picture, then explore each component in detail. + + + +**System components** + +The voice assistant consists of four key components: + +1. **cXML Server** (Fastify): Receives incoming call webhooks and returns cXML instructions to SignalWire +2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format +3. **AI Integration** (RealtimeSession + RealtimeAgent): Processes speech and generates responses +4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations + +**Audio path** + +*Inbound (Caller β†’ AI):* +- Phone β†’ SignalWire β†’ Base64 encoded payload β†’ WebSocket β†’ SignalWireCompatibilityTransportLayer β†’ ArrayBuffer β†’ OpenAI Realtime API + +*Outbound (AI β†’ Caller):* +- OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone + + + First, your server needs to handle incoming call webhooks from SignalWire. @@ -557,27 +580,6 @@ All of this happens in real-time during the conversation. - - -**System components** - -The voice assistant consists of four key components: - -1. **cXML Server** (Fastify): Receives incoming call webhooks and returns cXML instructions to SignalWire -2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format -3. **AI Integration** (RealtimeSession + RealtimeAgent): Processes speech and generates responses -4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations - -**Audio path** - -*Inbound (Caller β†’ AI):* -- Phone β†’ SignalWire β†’ Base64 encoded payload β†’ WebSocket β†’ SignalWireCompatibilityTransportLayer β†’ ArrayBuffer β†’ OpenAI Realtime API - -*Outbound (AI β†’ Caller):* -- OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone - - - ## Configuration From bd6df4f8507d4d50bce26e79a6b6d34a056fe514 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:41:11 +0000 Subject: [PATCH 67/73] update 'how it works' accordions --- .../realtime-streaming-to-openai/index.mdx | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 051b46e23..efd9a996d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -286,6 +286,8 @@ First, your server needs to handle incoming call webhooks from SignalWire. ```typescript title="src/routes/webhook.ts" import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; +import { WEBHOOK_MESSAGES, AUDIO_FORMAT, SIGNALWIRE_CODECS } from '../constants.js'; +import { AGENT_CONFIG } from '../config.js'; export async function webhookRoute(fastify: FastifyInstance) { fastify.all('/incoming-call', async (request: FastifyRequest, reply: FastifyReply) => { @@ -294,12 +296,21 @@ export async function webhookRoute(fastify: FastifyInstance) { const protocol = request.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; const websocketUrl = `${protocol}://${host}/media-stream`; + // Get codec attribute based on configured audio format + const codec = AGENT_CONFIG.audioFormat === AUDIO_FORMAT.PCM16 + ? SIGNALWIRE_CODECS.PCM16 + : SIGNALWIRE_CODECS.G711_ULAW; + const codecAttribute = codec ? ` codec="${codec}"` : ''; + + // Log codec selection for debugging + console.log(`πŸ“ž Incoming call - Audio format: ${AGENT_CONFIG.audioFormat}, SignalWire codec: ${codec || 'default (G.711 ΞΌ-law)'}`); + // Generate cXML response to stream audio to our WebSocket const cXMLResponse = ` - Connecting to agent + ${WEBHOOK_MESSAGES.CONNECTING} - + `; @@ -308,9 +319,7 @@ export async function webhookRoute(fastify: FastifyInstance) { } ``` -:::info Codec Negotiation -The example above uses the default codec (G.711 ΞΌ-law). For production deployments, you can enhance this by adding dynamic codec selection based on your configured audio format. The actual implementation supports both G.711 ΞΌ-law (standard telephony, 8kHz) and PCM16 (high quality, 24kHz). See [Configure audio format](#configure-audio-format) section for details. -::: +The webhook automatically sets the correct codec based on your `AUDIO_FORMAT` configuration. No manual codec configuration needed hereβ€”see [Configure audio format](#configure-audio-format) for how to set your desired audio quality. :::tip Webhook URL Format Your webhook URL must include `/incoming-call` at the end: @@ -320,9 +329,9 @@ Your webhook URL must include `/incoming-call` at the end: - + -Next, we will create a WebSocket server to handle bidirectional audio streaming. +This is the core of the integration. The WebSocket server receives audio from SignalWire and forwards it to OpenAI's Realtime API, then sends AI responses back. The `SignalWireCompatibilityTransportLayer` handles all the protocol translation and audio format conversions automatically. **Initialize WebSocket server** @@ -404,9 +413,9 @@ fastify.get('/media-stream', { websocket: true }, async (connection: WebSocket) - + -The AI agent configuration defines how your assistant behaves. Import your tools and set instructions: +The agent configuration controls how your assistant behaves during conversations. You define its personality through instructions, select its voice, specify which tools it can use, and configure the audio format: ```typescript title="src/config.ts - Agent configuration" import type { RealtimeAgentConfiguration } from '@openai/agents/realtime'; From f22a3cd98ea824b5a1eab17875a7bc3a6e2cfc78 Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 18:54:19 +0000 Subject: [PATCH 68/73] architecture accordion --- .../realtime-streaming-to-openai/index.mdx | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index efd9a996d..270efcf63 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -259,23 +259,13 @@ This section walks through the key components of the integration. Start with the -**System components** - -The voice assistant consists of four key components: +The application consists of four key components: 1. **cXML Server** (Fastify): Receives incoming call webhooks and returns cXML instructions to SignalWire 2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format -3. **AI Integration** (RealtimeSession + RealtimeAgent): Processes speech and generates responses +3. **OpenAI Connection**: Manages the realtime connection to OpenAI for speech processing and AI responses, using the RealtimeSession class from OpenAI's Realtime SDK 4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations -**Audio path** - -*Inbound (Caller β†’ AI):* -- Phone β†’ SignalWire β†’ Base64 encoded payload β†’ WebSocket β†’ SignalWireCompatibilityTransportLayer β†’ ArrayBuffer β†’ OpenAI Realtime API - -*Outbound (AI β†’ Caller):* -- OpenAI Realtime API β†’ ArrayBuffer β†’ SignalWireCompatibilityTransportLayer β†’ Base64 encoding β†’ WebSocket β†’ SignalWire β†’ Phone - @@ -319,7 +309,7 @@ export async function webhookRoute(fastify: FastifyInstance) { } ``` -The webhook automatically sets the correct codec based on your `AUDIO_FORMAT` configuration. No manual codec configuration needed hereβ€”see [Configure audio format](#configure-audio-format) for how to set your desired audio quality. +The webhook automatically sets the correct codec based on your `AUDIO_FORMAT` configuration. See [Configure audio format](#configure-audio-format) for how to set your desired audio quality. :::tip Webhook URL Format Your webhook URL must include `/incoming-call` at the end: From ebf0c69e37af3d1e285b756d57b7e13fd541c7cc Mon Sep 17 00:00:00 2001 From: ALR Date: Thu, 6 Nov 2025 23:37:44 +0000 Subject: [PATCH 69/73] misc edits --- .../realtime-streaming-to-openai/index.mdx | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 270efcf63..f43141310 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -190,7 +190,7 @@ Select the **Local** tab below if you ran the application locally, and the **Doc - + SignalWire must be able to reach your webhook from the internet. For local development, use [ngrok](https://ngrok.com/) or another [tunneling service](https://github.com/anderspitman/awesome-tunneling) to expose your local server. @@ -215,10 +215,13 @@ Use this as the **Primary Script URL** when creating your cXML script in the Sig + For production environments, set your server URL + `/incoming-call`: - ``` - https://your-domain.com/incoming-call - ``` + +``` +https://your-domain.com/incoming-call +``` + @@ -261,7 +264,7 @@ This section walks through the key components of the integration. Start with the The application consists of four key components: -1. **cXML Server** (Fastify): Receives incoming call webhooks and returns cXML instructions to SignalWire +1. **cXML Server** (Fastify): Receives incoming call webhooks and returns instructions, in the form of a [cXML Script][cxml], to SignalWire 2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format 3. **OpenAI Connection**: Manages the realtime connection to OpenAI for speech processing and AI responses, using the RealtimeSession class from OpenAI's Realtime SDK 4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations @@ -270,9 +273,10 @@ The application consists of four key components: -First, your server needs to handle incoming call webhooks from SignalWire. - -**Set up the HTTP endpoint** +When SignalWire receives an incoming call, it sends a webhook to your server. +Your server responds with instructions, in the form of a +[cXML Script][cxml], +that tell SignalWire to stream the audio to your WebSocket endpoint, which is connected to OpenAI. ```typescript title="src/routes/webhook.ts" import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; @@ -321,9 +325,9 @@ Your webhook URL must include `/incoming-call` at the end: -This is the core of the integration. The WebSocket server receives audio from SignalWire and forwards it to OpenAI's Realtime API, then sends AI responses back. The `SignalWireCompatibilityTransportLayer` handles all the protocol translation and audio format conversions automatically. +This is the core of the integration. When a WebSocket connection is established at `/media-stream`, you create a transport layer that bridges SignalWire's audio stream to OpenAI's Realtime API. The transport layer automatically handles protocol translation and audio format conversions, so audio flows bidirectionally without any manual conversion. -**Initialize WebSocket server** +In the code below, we handle WebSocket lifecycle events (connection, disconnection, errors), create the transport layer with the configured audio format, connect to OpenAI's Realtime API while logging key events (AI responses, transcriptions, tool calls), and trigger an immediate AI response to greet the caller. ```typescript title="src/routes/streaming.ts" import type { WebSocket } from 'ws'; From 58936216f10760f566d3a4b1705fcd33a0e2f233 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 7 Nov 2025 18:18:48 +0000 Subject: [PATCH 70/73] codec cards fixes --- .../nodejs/realtime-streaming-to-openai/index.mdx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index f43141310..cfd5563c8 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -693,17 +693,17 @@ Choose the right audio codec for your use case. The default is G.711 ΞΌ-law. title="PCM16 @ 24kHz" description="Crystal clear audio for demos and high-quality applications" > - **Sample rate:** `24 kHz` - **Bandwidth:** `~384 kbps` - **Quality:** `High definition` + **Sample rate:** `24 kHz` + **Bandwidth:** `~384 kbps` + **Quality:** `High definition`
- **Sample rate:** `8 kHz` - **Bandwidth:** `~64 kbps` - **Quality:** `Standard telephony` + **Sample rate:** `8 kHz` + **Bandwidth:** `~64 kbps` + **Quality:** `Standard telephony` From 2fc85fc82324cde3884f7f7828be0187a491b19a Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 7 Nov 2025 18:27:51 +0000 Subject: [PATCH 71/73] update tools accordion --- .../realtime-streaming-to-openai/index.mdx | 96 +++++++++++-------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index cfd5563c8..5614474e4 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -439,12 +439,63 @@ export const AGENT_CONFIG: RealtimeAgentConfiguration = { -Enable your AI to execute server-side tools during conversations. +Tools are server-side functions that the AI can call during a conversation. In production, extract the implementation logic into separate files as shown in the repository. -**Define tools** + + -Tools are functions the AI can call during a conversation. -Here's the structureβ€”in production, you extract the implementation logic into separate files as shown in the repository. +Get the current time in Eastern Time. Uses JavaScript's built-in `toLocaleString` methodβ€”no external APIs required. + +```typescript title="src/tools/time.tool.ts" +import { z } from 'zod'; +import { tool as realtimeTool } from '@openai/agents/realtime'; +import { ERROR_MESSAGES } from '../constants.js'; + +export const timeTool = realtimeTool({ + name: 'get_time', + description: 'Get the current time in Eastern Time', + parameters: z.object({}), // No parameters needed + execute: async () => { + try { + const now = new Date(); + + // Always format for Eastern Time + const easternTime = now.toLocaleString('en-US', { + timeZone: 'America/New_York', + timeZoneName: 'short', + weekday: 'long', + year: 'numeric', + month: 'long', + day: 'numeric', + hour: 'numeric', + minute: '2-digit' + }); + + return `The current time in Eastern Time is ${easternTime}.`; + } catch (error) { + // Return fallback message if time formatting fails + return ERROR_MESSAGES.TIME_UNAVAILABLE; + } + }, +}); +``` + +1. **User asks**: "What time is it?" +2. **AI recognizes intent**: Needs time information +3. **Function call triggered**: `get_time()` +4. **Server executes**: Gets current Eastern Time +5. **Result returned**: AI incorporates into response +6. **User hears**: "The current time is 3:45 PM Eastern Time." + + + + +Get current weather information for a specified US location. +Uses the +[OpenStreetMap Nominatim API](https://nominatim.org/) +for geocoding and the +[US National Weather Service API](https://www.weather.gov/documentation/services-web-api) +for forecast data. ```typescript title="src/tools/weather.tool.ts" import { z } from 'zod'; @@ -538,40 +589,6 @@ export const weatherTool = realtimeTool({ }); ``` -```typescript title="src/tools/time.tool.ts" -import { z } from 'zod'; -import { tool as realtimeTool } from '@openai/agents/realtime'; -import { ERROR_MESSAGES } from '../constants.js'; - -export const timeTool = realtimeTool({ - name: 'get_time', - description: 'Get the current time in Eastern Time', - parameters: z.object({}), // No parameters needed - execute: async () => { - try { - const now = new Date(); - - // Always format for Eastern Time - const easternTime = now.toLocaleString('en-US', { - timeZone: 'America/New_York', - timeZoneName: 'short', - weekday: 'long', - year: 'numeric', - month: 'long', - day: 'numeric', - hour: 'numeric', - minute: '2-digit' - }); - - return `The current time in Eastern Time is ${easternTime}.`; - } catch (error) { - // Return fallback message if time formatting fails - return ERROR_MESSAGES.TIME_UNAVAILABLE; - } - }, -}); -``` - 1. **User asks**: "What's the weather in New York?" 2. **AI recognizes intent**: Needs weather information 3. **Function call triggered**: `get_weather({ location: "New York" })` @@ -579,7 +596,8 @@ export const timeTool = realtimeTool({ 5. **Result returned**: AI incorporates into response 6. **User hears**: "The weather in New York is 72Β°F and sunny." -All of this happens in real-time during the conversation. + + From 3242835e4f96e99a7c6b7b1876006dcd2b3b81c0 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 7 Nov 2025 18:28:07 +0000 Subject: [PATCH 72/73] update claude.md --- .cursor/rules/docs.mdc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc index ee687df4d..efdbbd309 100644 --- a/.cursor/rules/docs.mdc +++ b/.cursor/rules/docs.mdc @@ -61,7 +61,7 @@ description: "Brief page description" # for SEO - **Titles**: Avoid gerunds in titles when practical ('Install package' rather than 'Installing the package') - **Sentence case**: Apply sentence case to card titles, tab titles, and pseudo-headers - **Pseudo-headers**: Titles made with simple bolded text (either on their own line or in lists with colons) should use sentence case -- **Semantic line breaks**: Follow the SemBr specification by putting line breaks +- **Semantic line breaks**: Follow the SemBr specification by putting line breaks before and after major clauses. Links should occupy their own complete line. ### Header formatting - Use sentence case for all headers (capitalize only the first word and proper nouns) From 521d889aa39239c0dfefe5d2cd286b61d1aacf28 Mon Sep 17 00:00:00 2001 From: ALR Date: Fri, 7 Nov 2025 18:42:30 +0000 Subject: [PATCH 73/73] clarify architecture --- .../voice/nodejs/realtime-streaming-to-openai/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx index 5614474e4..409b4310d 100644 --- a/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx +++ b/website/docs/main/compatibility-api/guides/voice/nodejs/realtime-streaming-to-openai/index.mdx @@ -266,8 +266,8 @@ The application consists of four key components: 1. **cXML Server** (Fastify): Receives incoming call webhooks and returns instructions, in the form of a [cXML Script][cxml], to SignalWire 2. **WebSocket Bridge** (SignalWireCompatibilityTransportLayer): Translates between SignalWire's media stream protocol and OpenAI's Realtime API format -3. **OpenAI Connection**: Manages the realtime connection to OpenAI for speech processing and AI responses, using the RealtimeSession class from OpenAI's Realtime SDK -4. **Function Calling**: Server-side tool execution (weather, time, custom functions) during conversations +3. **Realtime agent and session**: Creates a RealtimeAgent with behavior instructions and attaches it to a RealtimeSession, which manages the bidirectional connection to OpenAI's Realtime API using the transport layer for speech processing and AI responses +4. **Function calling**: Server-side tool execution (weather, time, custom functions) during conversations