diff --git a/examples/realtime-twilio/README.md b/examples/realtime-twilio/README.md index cb1de554..ae0054e7 100644 --- a/examples/realtime-twilio/README.md +++ b/examples/realtime-twilio/README.md @@ -5,6 +5,10 @@ The script in `index.ts` starts a Fastify server that serves TwiML for incoming endpoint for streaming audio. When a call connects, the audio stream is forwarded through a `TwilioRealtimeTransportLayer` to a `RealtimeSession` so the `RealtimeAgent` can respond in real time. +The demo agent mirrors the [realtime-next](../realtime-next) example. It includes the same MCP integrations +(`dnd` and `deepwiki`) as well as local sample tools for weather lookups and a secret number helper. Ask the +agent to "look that up in Deep Wiki" or "roll a Dungeons and Dragons character" to try the hosted MCP tools. + To try it out you must have a Twilio phone number. Expose your localhost with a tunneling service such as ngrok and set the phone number's incoming call URL to `https:///incoming-call`. @@ -13,4 +17,3 @@ Start the server with: ```bash pnpm -F realtime-twilio start ``` - diff --git a/examples/realtime-twilio/index.ts b/examples/realtime-twilio/index.ts index e7898d96..0335806e 100644 --- a/examples/realtime-twilio/index.ts +++ b/examples/realtime-twilio/index.ts @@ -1,9 +1,18 @@ import Fastify from 'fastify'; +import type { FastifyInstance, FastifyReply, FastifyRequest } from 'fastify'; import dotenv from 'dotenv'; import fastifyFormBody from '@fastify/formbody'; import fastifyWs from '@fastify/websocket'; -import { RealtimeAgent, RealtimeSession } from '@openai/agents/realtime'; +import { + RealtimeAgent, + RealtimeSession, + backgroundResult, + tool, +} from '@openai/agents/realtime'; import { TwilioRealtimeTransportLayer } from '@openai/agents-extensions'; +import { hostedMcpTool } from '@openai/agents'; +import { z } from 'zod'; +import process from 'node:process'; // Load environment variables from .env file dotenv.config(); @@ -21,21 +30,60 @@ const fastify = Fastify(); fastify.register(fastifyFormBody); fastify.register(fastifyWs); +const weatherTool = tool({ + name: 'weather', + description: 'Get the weather in a given location.', + parameters: z.object({ + location: z.string(), + }), + execute: async ({ location }: { location: string }) => { + return backgroundResult(`The weather in ${location} is sunny.`); + }, +}); + +const secretTool = tool({ + name: 'secret', + description: 'A secret tool to tell the special number.', + parameters: z.object({ + question: z + .string() + .describe( + 'The question to ask the secret tool; mainly about the special number.', + ), + }), + execute: async ({ question }: { question: string }) => { + return `The answer to ${question} is 42.`; + }, + needsApproval: true, +}); + const agent = new RealtimeAgent({ - name: 'Triage Agent', + name: 'Greeter', instructions: - 'You are a helpful assistant that starts every conversation with a creative greeting.', + 'You are a friendly assistant. When you use a tool always first say what you are about to do.', + tools: [ + hostedMcpTool({ + serverLabel: 'dnd', + }), + hostedMcpTool({ + serverLabel: 'deepwiki', + }), + secretTool, + weatherTool, + ], }); // Root Route -fastify.get('/', async (request, reply) => { +fastify.get('/', async (_request: FastifyRequest, reply: FastifyReply) => { reply.send({ message: 'Twilio Media Stream Server is running!' }); }); // Route for Twilio to handle incoming and outgoing calls // punctuation to improve text-to-speech translation -fastify.all('/incoming-call', async (request, reply) => { - const twimlResponse = ` +fastify.all( + '/incoming-call', + async (request: FastifyRequest, reply: FastifyReply) => { + const twimlResponse = ` O.K. you can start talking! @@ -43,28 +91,67 @@ fastify.all('/incoming-call', async (request, reply) => { `.trim(); - reply.type('text/xml').send(twimlResponse); -}); + reply.type('text/xml').send(twimlResponse); + }, +); // WebSocket route for media-stream -fastify.register(async (fastify) => { - fastify.get('/media-stream', { websocket: true }, async (connection) => { - const twilioTransportLayer = new TwilioRealtimeTransportLayer({ - twilioWebSocket: connection, - }); - - const session = new RealtimeSession(agent, { - transport: twilioTransportLayer, - }); - - await session.connect({ - apiKey: OPENAI_API_KEY, - }); - console.log('Connected to the OpenAI Realtime API'); - }); +fastify.register(async (scopedFastify: FastifyInstance) => { + scopedFastify.get( + '/media-stream', + { websocket: true }, + async (connection: any) => { + const twilioTransportLayer = new TwilioRealtimeTransportLayer({ + twilioWebSocket: connection, + }); + + const session = new RealtimeSession(agent, { + transport: twilioTransportLayer, + model: 'gpt-realtime', + config: { + audio: { + output: { + voice: 'verse', + }, + }, + }, + }); + + session.on('mcp_tools_changed', (tools: { name: string }[]) => { + const toolNames = tools.map((tool) => tool.name).join(', '); + console.log(`Available MCP tools: ${toolNames || 'None'}`); + }); + + session.on( + 'tool_approval_requested', + (_context: unknown, _agent: unknown, approvalRequest: any) => { + console.log( + `Approving tool call for ${approvalRequest.approvalItem.rawItem.name}.`, + ); + session + .approve(approvalRequest.approvalItem) + .catch((error: unknown) => + console.error('Failed to approve tool call.', error), + ); + }, + ); + + session.on( + 'mcp_tool_call_completed', + (_context: unknown, _agent: unknown, toolCall: unknown) => { + console.log('MCP tool call completed.', toolCall); + }, + ); + + await session.connect({ + apiKey: OPENAI_API_KEY, + }); + console.log('Connected to the OpenAI Realtime API'); + }, + ); }); -fastify.listen({ port: PORT }, (err) => { +fastify.listen({ port: PORT }, (err: Error | null) => { if (err) { console.error(err); process.exit(1);