# Building an Interruptible Customer Support Agent with OpenAI Realtime API

This notebook demonstrates how to build a voice-based customer support agent using the **OpenAI Realtime API (Beta)**.

## Key Features Demonstrated
1. **WebSocket Connection**: Persistent, low-latency stateful connection.
2. **Tool Use**: The agent can look up mock data (Order Status).
3. **Interruption Handling**: The client handles `input_audio_buffer.speech_started` to cancel the AI's response when the user interrupts.

## Prerequisites
- OpenAI API Key with access to `gpt-4o-realtime-preview`.
- Python 3.10+
- `websockets`, `asyncio`, `numpy`


In [None]:
import asyncio
import json
import logging
import websockets
import traceback

# Configuring logging to see the events live
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("RealtimeAgent")

## 1. Define Tools

We define a simple tool `get_order_status` to look up orders in a mock database.

In [None]:
# Mock Data
MOCK_DB = {
    "order_123": {"status": "shipped", "delivery_date": "2024-12-20"},
    "order_456": {"status": "processing", "delivery_date": "2024-12-25"},
}

def get_order_status(order_id: str):
    """
    Look up an order by ID.
    """
    print(f"\n[Tool] Looking up order {order_id}...")
    result = MOCK_DB.get(order_id, {"status": "not_found"})
    return json.dumps(result)

tools_schema = [
    {
        "type": "function",
        "name": "get_order_status",
        "description": "Get the status of a customer order",
        "parameters": {
            "type": "object",
            "properties": {
                "order_id": {
                    "type": "string",
                    "description": "The order ID, e.g. order_123"
                }
            },
            "required": ["order_id"]
        }
    }
]

## 2. The Realtime Agent Class

This class handles the WebSocket connection, event parsing, and audio streaming.

In [None]:
class RealtimeAgent:
    def __init__(self, url="wss://api.openai.com/v1/realtime", api_key=None, model="gpt-4o-realtime-preview-2024-10-01"):
        self.url = f"{url}?model={model}"
        self.api_key = api_key
        self.ws = None
        self.should_stop = False

    async def connect(self):
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "OpenAI-Beta": "realtime=v1"
        }
        try:
            logger.info(f"Connecting to {self.url}...")
            self.ws = await websockets.connect(self.url, additional_headers=headers)
            logger.info("Connected!")
            await self.initialize_session()
        except Exception as e:
            logger.error(f"Connection failed: {e}")
            raise

    async def initialize_session(self):
        """Send initial session configuration."""
        event = {
            "type": "session.update",
            "session": {
                "modalities": ["text", "audio"],
                "instructions": "You are a helpful customer support agent. Check order status when asked.",
                "voice": "alloy",
                "turn_detection": {"type": "server_vad"},
                "tools": tools_schema,
                "tool_choice": "auto",
            }
        }
        await self.send_event(event)

    async def send_event(self, event):
        if self.ws:
            await self.ws.send(json.dumps(event))

    async def run_loop(self):
        """Main loop to receive messages."""
        try:
            async for message in self.ws:
                if self.should_stop: break
                await self.handle_message(json.loads(message))
        except Exception as e:
            logger.error(f"Loop error: {e}")

    async def handle_message(self, data):
        event_type = data.get("type")
        
        if event_type == "input_audio_buffer.speech_started":
            logger.warning("[INTERRUPTION] User started speaking! Canceling current response.")
            await self.send_event({"type": "response.cancel"})
            
        elif event_type == "response.function_call_arguments.done":
            # Execute the tool
            call_id = data.get("call_id")
            name = data.get("name")
            args = json.loads(data.get("arguments"))
            
            if name == "get_order_status":
                result = get_order_status(args.get("order_id"))
                # Send output back
                await self.send_event({
                    "type": "conversation.item.create",
                    "item": {
                        "type": "function_call_output",
                        "call_id": call_id,
                        "output": result
                    }
                })
                # Trigger model to read the result
                await self.send_event({"type": "response.create"})
                
        elif event_type == "response.audio.delta":
            # Here you would play audio bytes
            pass
            
        elif event_type == "error":
            logger.error(f"Error: {data.get('error')}")

## 3. Running the Agent

Replace `YOUR_API_KEY` below. This block runs the agent for 10 seconds to demonstrate the connection.

In [None]:
# REPLACE WITH YOUR KEY
API_KEY = "YOUR_API_KEY_HERE"

async def main():
    if "YOUR_API_KEY" in API_KEY:
        print("Please set a valid API Key first!")
        return

    agent = RealtimeAgent(api_key=API_KEY)
    await agent.connect()
    
    # Run the listener in background
    listener_task = asyncio.create_task(agent.run_loop())
    
    try:
        # Simulate a conversation starter if you like, or just wait for VAD
        # For this demo, we just keep it alive for 10 seconds
        print("Agent is listening (server VAD)... Speak now if you have a mic setup!")
        print("(Or manually trigger response.create if using mock)")
        await asyncio.sleep(10)
    finally:
        agent.should_stop = True
        await agent.ws.close()
        await listener_task

# In Jupyter, we can await main directly
# await main()