Skip to content

Commit c9f3085

Browse files
authored
1 parent 0f89a8f commit c9f3085

File tree

1 file changed

+285
-28
lines changed

1 file changed

+285
-28
lines changed

openai-webrtc.html

+285-28
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
}
1818

1919
.container {
20-
max-width: 800px;
20+
max-width: 1200px;
2121
margin: 0 auto;
2222
}
2323

@@ -94,6 +94,84 @@
9494
background: #efe;
9595
color: #0a0;
9696
}
97+
98+
.stats-container {
99+
display: grid;
100+
grid-template-columns: 1fr 1fr;
101+
gap: 20px;
102+
margin: 20px 0;
103+
}
104+
105+
@media (max-width: 1000px) {
106+
.stats-container {
107+
grid-template-columns: 1fr;
108+
}
109+
}
110+
111+
.stats-box {
112+
background: #fff;
113+
border-radius: 4px;
114+
padding: 20px;
115+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
116+
}
117+
118+
.stats-box h2 {
119+
margin: 0 0 15px 0;
120+
padding-bottom: 10px;
121+
border-bottom: 1px solid #eee;
122+
}
123+
124+
.stats-grid {
125+
display: grid;
126+
grid-template-columns: repeat(3, 1fr);
127+
gap: 20px;
128+
}
129+
130+
.stats-section {
131+
padding: 10px;
132+
background: #f8f9fa;
133+
border-radius: 4px;
134+
}
135+
136+
.stats-section h3 {
137+
margin: 0 0 10px 0;
138+
font-size: 1.1em;
139+
color: #495057;
140+
}
141+
142+
.stats-section div {
143+
margin: 5px 0;
144+
font-size: 0.95em;
145+
}
146+
147+
.stats-section span {
148+
font-weight: bold;
149+
color: #007bff;
150+
}
151+
152+
.events-container {
153+
margin-top: 20px;
154+
background: #fff;
155+
border-radius: 4px;
156+
padding: 15px;
157+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
158+
}
159+
160+
.event-entry {
161+
font-family: monospace;
162+
white-space: pre;
163+
padding: 10px;
164+
margin: 10px 0;
165+
background: #f8f9fa;
166+
border-radius: 4px;
167+
border-left: 4px solid #007bff;
168+
}
169+
170+
.event-timestamp {
171+
color: #666;
172+
font-size: 0.9em;
173+
margin-bottom: 5px;
174+
}
97175
</style>
98176
</head>
99177
<body>
@@ -122,44 +200,223 @@ <h1>
122200
</div>
123201

124202
<div id="status" class="status"></div>
203+
204+
<div class="stats-container">
205+
<div class="stats-box">
206+
<h2>Most recent interaction</h2>
207+
<div class="stats-grid">
208+
<div class="stats-section">
209+
<h3>Input tokens</h3>
210+
<div>Audio: <span id="currentAudioInputTokens">0</span></div>
211+
<div>Text: <span id="currentTextInputTokens">0</span></div>
212+
<div>Cached: <span id="currentCachedTokens">0</span></div>
213+
</div>
214+
<div class="stats-section">
215+
<h3>Output tokens</h3>
216+
<div>Audio: <span id="currentAudioOutputTokens">0</span></div>
217+
<div>Text: <span id="currentTextOutputTokens">0</span></div>
218+
</div>
219+
<div class="stats-section">
220+
<h3>Costs</h3>
221+
<div>Input cost: <span id="currentInputCost">$0.00</span></div>
222+
<div>Output cost: <span id="currentOutputCost">$0.00</span></div>
223+
<div>Total cost: <span id="currentTotalCost">$0.00</span></div>
224+
</div>
225+
</div>
226+
</div>
227+
228+
<div class="stats-box">
229+
<h2>Session total</h2>
230+
<div class="stats-grid">
231+
<div class="stats-section">
232+
<h3>Input tokens</h3>
233+
<div>Audio: <span id="totalAudioInputTokens">0</span></div>
234+
<div>Text: <span id="totalTextInputTokens">0</span></div>
235+
<div>Cached: <span id="totalCachedTokens">0</span></div>
236+
</div>
237+
<div class="stats-section">
238+
<h3>Output tokens</h3>
239+
<div>Audio: <span id="totalAudioOutputTokens">0</span></div>
240+
<div>Text: <span id="totalTextOutputTokens">0</span></div>
241+
</div>
242+
<div class="stats-section">
243+
<h3>Costs</h3>
244+
<div>Input cost: <span id="totalInputCost">$0.00</span></div>
245+
<div>Output cost: <span id="totalOutputCost">$0.00</span></div>
246+
<div>Total cost: <span id="totalTotalCost">$0.00</span></div>
247+
</div>
248+
</div>
249+
</div>
250+
</div>
251+
252+
<p>I'm not 100% confident that these cost calculations are correct.</p>
253+
254+
<div class="events-container">
255+
<h2>Session Events</h2>
256+
<div id="events"></div>
257+
</div>
125258
</div>
126259

127260
<script type="module">
128-
async function createRealtimeSession(inStream, token, voice) {
129-
const pc = new RTCPeerConnection()
261+
// Track cumulative totals
262+
let sessionTotals = {
263+
audioInputTokens: 0,
264+
textInputTokens: 0,
265+
cachedInputTokens: 0,
266+
audioOutputTokens: 0,
267+
textOutputTokens: 0,
268+
inputCost: 0,
269+
outputCost: 0,
270+
totalCost: 0
271+
}
272+
273+
async function createRealtimeSession(inStream, token, voice) {
274+
const pc = new RTCPeerConnection()
275+
276+
pc.ontrack = e => {
277+
const audio = new Audio()
278+
audio.srcObject = e.streams[0]
279+
audio.play()
280+
}
281+
282+
pc.addTrack(inStream.getTracks()[0])
283+
284+
const dc = pc.createDataChannel("oai-events")
285+
dc.addEventListener("message", (e) => {
286+
try {
287+
const eventData = JSON.parse(e.data)
130288

131-
// Handle incoming audio
132-
pc.ontrack = e => {
133-
const audio = new Audio()
134-
audio.srcObject = e.streams[0]
135-
audio.play()
289+
if (eventData.type === 'response.done' &&
290+
eventData.response &&
291+
eventData.response.usage) {
292+
const usage = eventData.response.usage
293+
const inputDetails = usage.input_token_details
294+
const outputDetails = usage.output_token_details
295+
const cachedDetails = inputDetails.cached_tokens_details
296+
297+
const currentStats = {
298+
audioInputTokens: inputDetails.audio_tokens - cachedDetails.audio_tokens,
299+
textInputTokens: inputDetails.text_tokens - cachedDetails.text_tokens,
300+
cachedInputTokens: inputDetails.cached_tokens,
301+
audioOutputTokens: outputDetails.audio_tokens,
302+
textOutputTokens: outputDetails.text_tokens
303+
}
304+
305+
const costs = calculateCosts(currentStats)
306+
307+
// Update current interaction display
308+
updateCurrentStats(currentStats, costs)
309+
310+
// Update session totals
311+
updateSessionTotals(currentStats, costs)
136312
}
137313

138-
pc.addTrack(inStream.getTracks()[0])
139-
140-
const offer = await pc.createOffer()
141-
await pc.setLocalDescription(offer)
314+
addEventToLog(eventData)
315+
} catch (err) {
316+
console.error('Error parsing event data:', err)
317+
}
318+
})
319+
320+
const offer = await pc.createOffer()
321+
await pc.setLocalDescription(offer)
322+
323+
const headers = {
324+
Authorization: `Bearer ${token}`,
325+
'Content-Type': 'application/sdp'
326+
}
327+
328+
const opts = {
329+
method: 'POST',
330+
body: offer.sdp,
331+
headers
332+
}
333+
334+
const model = 'gpt-4o-realtime-preview-2024-12-17'
335+
const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
336+
337+
await pc.setRemoteDescription({
338+
type: 'answer',
339+
sdp: await resp.text()
340+
})
341+
342+
return pc
343+
}
344+
345+
function calculateCosts({audioInputTokens, textInputTokens, cachedInputTokens, audioOutputTokens, textOutputTokens}) {
346+
const AUDIO_INPUT_COST = 0.00004
347+
const AUDIO_OUTPUT_COST = 0.00008
348+
const CACHED_AUDIO_COST = 0.0000025
349+
const TEXT_INPUT_COST = 0.0000025
350+
const TEXT_OUTPUT_COST = 0.00001
351+
352+
const audioInputCost = audioInputTokens * AUDIO_INPUT_COST
353+
const cachedInputCost = cachedInputTokens * CACHED_AUDIO_COST
354+
const textInputCost = textInputTokens * TEXT_INPUT_COST
355+
const audioOutputCost = audioOutputTokens * AUDIO_OUTPUT_COST
356+
const textOutputCost = textOutputTokens * TEXT_OUTPUT_COST
357+
358+
return {
359+
inputCost: audioInputCost + cachedInputCost + textInputCost,
360+
outputCost: audioOutputCost + textOutputCost,
361+
totalCost: audioInputCost + cachedInputCost + textInputCost + audioOutputCost + textOutputCost
362+
}
363+
}
364+
365+
function updateCurrentStats(stats, costs) {
366+
document.getElementById('currentAudioInputTokens').textContent = stats.audioInputTokens.toLocaleString()
367+
document.getElementById('currentTextInputTokens').textContent = stats.textInputTokens.toLocaleString()
368+
document.getElementById('currentCachedTokens').textContent = stats.cachedInputTokens.toLocaleString()
369+
document.getElementById('currentAudioOutputTokens').textContent = stats.audioOutputTokens.toLocaleString()
370+
document.getElementById('currentTextOutputTokens').textContent = stats.textOutputTokens.toLocaleString()
371+
document.getElementById('currentInputCost').textContent = `$${costs.inputCost.toFixed(4)}`
372+
document.getElementById('currentOutputCost').textContent = `$${costs.outputCost.toFixed(4)}`
373+
document.getElementById('currentTotalCost').textContent = `$${costs.totalCost.toFixed(4)}`
374+
}
375+
376+
function updateSessionTotals(currentStats, costs) {
377+
// Update running totals
378+
sessionTotals.audioInputTokens += currentStats.audioInputTokens
379+
sessionTotals.textInputTokens += currentStats.textInputTokens
380+
sessionTotals.cachedInputTokens += currentStats.cachedInputTokens
381+
sessionTotals.audioOutputTokens += currentStats.audioOutputTokens
382+
sessionTotals.textOutputTokens += currentStats.textOutputTokens
383+
sessionTotals.inputCost += costs.inputCost
384+
sessionTotals.outputCost += costs.outputCost
385+
sessionTotals.totalCost += costs.totalCost
386+
387+
// Update display
388+
document.getElementById('totalAudioInputTokens').textContent = sessionTotals.audioInputTokens.toLocaleString()
389+
document.getElementById('totalTextInputTokens').textContent = sessionTotals.textInputTokens.toLocaleString()
390+
document.getElementById('totalCachedTokens').textContent = sessionTotals.cachedInputTokens.toLocaleString()
391+
document.getElementById('totalAudioOutputTokens').textContent = sessionTotals.audioOutputTokens.toLocaleString()
392+
document.getElementById('totalTextOutputTokens').textContent = sessionTotals.textOutputTokens.toLocaleString()
393+
document.getElementById('totalInputCost').textContent = `$${sessionTotals.inputCost.toFixed(4)}`
394+
document.getElementById('totalOutputCost').textContent = `$${sessionTotals.outputCost.toFixed(4)}`
395+
document.getElementById('totalTotalCost').textContent = `$${sessionTotals.totalCost.toFixed(4)}`
396+
}
397+
398+
function addEventToLog(eventData) {
399+
const eventsContainer = document.getElementById('events');
400+
const eventEntry = document.createElement('div');
401+
eventEntry.className = 'event-entry';
142402

143-
const headers = {
144-
Authorization: `Bearer ${token}`,
145-
'Content-Type': 'application/sdp'
146-
}
403+
const timestamp = document.createElement('div');
404+
timestamp.className = 'event-timestamp';
405+
timestamp.textContent = new Date().toISOString();
147406

148-
const opts = {
149-
method: 'POST',
150-
body: offer.sdp,
151-
headers
152-
}
407+
const content = document.createElement('div');
408+
content.textContent = JSON.stringify(eventData, null, 2);
153409

154-
const model = 'gpt-4o-realtime-preview-2024-12-17'
155-
const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
410+
eventEntry.appendChild(timestamp);
411+
eventEntry.appendChild(content);
156412

157-
await pc.setRemoteDescription({
158-
type: 'answer',
159-
sdp: await resp.text()
160-
})
413+
// Add new events at the top
414+
eventsContainer.insertBefore(eventEntry, eventsContainer.firstChild);
161415

162-
return pc
416+
// Optional: limit number of displayed events to prevent excessive DOM growth
417+
while (eventsContainer.children.length > 50) {
418+
eventsContainer.removeChild(eventsContainer.lastChild);
419+
}
163420
}
164421

165422
const startButton = document.getElementById('startButton')

0 commit comments

Comments
 (0)