Skip to content

Commit 55c754b

Browse files
authored
1 parent 8bb09dc commit 55c754b

File tree

1 file changed

+152
-149
lines changed

1 file changed

+152
-149
lines changed

openai-webrtc.html

+152-149
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
font-weight: bold;
5757
}
5858

59-
input {
59+
input, select {
6060
width: 100%;
6161
padding: 8px;
6262
font-size: 16px;
@@ -108,170 +108,173 @@ <h1>
108108
<label for="tokenInput">OpenAI API Token</label>
109109
<input type="password" id="tokenInput">
110110
</div>
111+
<div class="form-group">
112+
<label for="voiceSelect">Voice</label>
113+
<select id="voiceSelect">
114+
<option value="ash">Ash</option>
115+
<option value="ballad">Ballad</option>
116+
<option value="coral">Coral</option>
117+
<option value="sage">Sage</option>
118+
<option value="verse">Verse</option>
119+
</select>
120+
</div>
111121
<button id="startButton">Start Session</button>
112122
</div>
113123

114124
<div id="status" class="status"></div>
115125
</div>
116126

117127
<script type="module">
118-
// Text decoder for incoming messages
119-
const decoder = new TextDecoder()
120-
121-
async function createRealtimeSession(inStream, token) {
122-
const pc = new RTCPeerConnection()
123-
124-
// Handle incoming audio
125-
pc.ontrack = e => {
126-
const audio = new Audio()
127-
audio.srcObject = e.streams[0]
128-
audio.play()
129-
}
130-
131-
// Handle incoming text messages via data channel
132-
pc.ondatachannel = event => {
133-
const channel = event.channel
134-
channel.onmessage = msg => {
135-
console.log(decoder.decode(msg.data))
128+
async function createRealtimeSession(inStream, token, voice) {
129+
const pc = new RTCPeerConnection()
130+
131+
// Handle incoming audio
132+
pc.ontrack = e => {
133+
const audio = new Audio()
134+
audio.srcObject = e.streams[0]
135+
audio.play()
136+
}
137+
138+
pc.addTrack(inStream.getTracks()[0])
139+
140+
const offer = await pc.createOffer()
141+
await pc.setLocalDescription(offer)
142+
143+
const headers = {
144+
Authorization: `Bearer ${token}`,
145+
'Content-Type': 'application/sdp'
146+
}
147+
148+
const opts = {
149+
method: 'POST',
150+
body: offer.sdp,
151+
headers
152+
}
153+
154+
const model = 'gpt-4o-realtime-preview-2024-12-17'
155+
const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
156+
157+
await pc.setRemoteDescription({
158+
type: 'answer',
159+
sdp: await resp.text()
160+
})
161+
162+
return pc
136163
}
137-
}
138-
139-
pc.addTrack(inStream.getTracks()[0])
140-
141-
const offer = await pc.createOffer()
142-
await pc.setLocalDescription(offer)
143-
144-
const headers = {
145-
Authorization: `Bearer ${token}`,
146-
'Content-Type': 'application/sdp'
147-
}
148-
149-
const opts = {
150-
method: 'POST',
151-
body: offer.sdp,
152-
headers
153-
}
154-
155-
const resp = await fetch('https://api.openai.com/v1/realtime', opts)
156-
await pc.setRemoteDescription({
157-
type: 'answer',
158-
sdp: await resp.text()
159-
})
160-
161-
return pc
162-
}
163-
164-
const startButton = document.getElementById('startButton')
165-
const tokenInput = document.getElementById('tokenInput')
166-
const status = document.getElementById('status')
167-
const audioIndicator = document.getElementById('audioIndicator')
168164

169-
let peerConnection = null
170-
let audioContext = null
171-
let audioStream = null
172-
173-
// Load saved API key on page load
174-
document.addEventListener('DOMContentLoaded', () => {
175-
const savedToken = localStorage.getItem('openai_api_key')
176-
if (savedToken) {
177-
tokenInput.value = savedToken
178-
}
179-
})
165+
const startButton = document.getElementById('startButton')
166+
const tokenInput = document.getElementById('tokenInput')
167+
const voiceSelect = document.getElementById('voiceSelect')
168+
const status = document.getElementById('status')
169+
const audioIndicator = document.getElementById('audioIndicator')
180170

181-
// Audio visualization
182-
function setupAudioVisualization(stream) {
183-
audioContext = new AudioContext()
184-
const source = audioContext.createMediaStreamSource(stream)
185-
const analyzer = audioContext.createAnalyser()
186-
analyzer.fftSize = 256
187-
188-
source.connect(analyzer)
189-
190-
const bufferLength = analyzer.frequencyBinCount
191-
const dataArray = new Uint8Array(bufferLength)
192-
193-
function updateIndicator() {
194-
if (!audioContext) return
195-
196-
analyzer.getByteFrequencyData(dataArray)
197-
const average = dataArray.reduce((a, b) => a + b) / bufferLength
198-
199-
audioIndicator.classList.toggle('active', average > 30)
200-
requestAnimationFrame(updateIndicator)
201-
}
202-
203-
updateIndicator()
204-
}
171+
let peerConnection = null
172+
let audioContext = null
173+
let audioStream = null
205174

206-
async function startSession() {
207-
try {
208-
// Save API key to localStorage
209-
localStorage.setItem('openai_api_key', tokenInput.value)
210-
211-
status.className = 'status'
212-
status.textContent = 'Requesting microphone access...'
213-
214-
audioStream = await navigator.mediaDevices.getUserMedia({
215-
audio: true,
216-
video: false
175+
// Load saved API key on page load
176+
document.addEventListener('DOMContentLoaded', () => {
177+
const savedToken = localStorage.getItem('openai_api_key')
178+
if (savedToken) {
179+
tokenInput.value = savedToken
180+
}
217181
})
218-
219-
setupAudioVisualization(audioStream)
220-
221-
status.textContent = 'Establishing connection...'
222-
223-
peerConnection = await createRealtimeSession(
224-
audioStream,
225-
tokenInput.value
226-
)
227-
228-
status.className = 'status success'
229-
status.textContent = 'Session established successfully!'
230-
startButton.textContent = 'Stop Session'
231-
232-
} catch (err) {
233-
status.className = 'status error'
234-
status.textContent = `Error: ${err.message}`
235-
console.error('Session error:', err)
236-
stopSession()
237-
}
238-
}
239182

240-
function stopSession() {
241-
if (peerConnection) {
242-
peerConnection.close()
243-
peerConnection = null
244-
}
245-
246-
if (audioContext) {
247-
audioContext.close()
248-
audioContext = null
249-
}
250-
251-
if (audioStream) {
252-
audioStream.getTracks().forEach(track => track.stop())
253-
audioStream = null
254-
}
255-
256-
audioIndicator.classList.remove('active')
257-
startButton.textContent = 'Start Session'
258-
}
183+
// Audio visualization
184+
function setupAudioVisualization(stream) {
185+
audioContext = new AudioContext()
186+
const source = audioContext.createMediaStreamSource(stream)
187+
const analyzer = audioContext.createAnalyser()
188+
analyzer.fftSize = 256
189+
190+
source.connect(analyzer)
191+
192+
const bufferLength = analyzer.frequencyBinCount
193+
const dataArray = new Uint8Array(bufferLength)
194+
195+
function updateIndicator() {
196+
if (!audioContext) return
197+
198+
analyzer.getByteFrequencyData(dataArray)
199+
const average = dataArray.reduce((a, b) => a + b) / bufferLength
200+
201+
audioIndicator.classList.toggle('active', average > 30)
202+
requestAnimationFrame(updateIndicator)
203+
}
204+
205+
updateIndicator()
206+
}
259207

260-
startButton.addEventListener('click', () => {
261-
if (peerConnection) {
262-
stopSession()
263-
} else {
264-
if (!tokenInput.value) {
265-
status.className = 'status error'
266-
status.textContent = 'Please enter an API token'
267-
return
208+
async function startSession() {
209+
try {
210+
// Save API key to localStorage
211+
localStorage.setItem('openai_api_key', tokenInput.value)
212+
213+
status.className = 'status'
214+
status.textContent = 'Requesting microphone access...'
215+
216+
audioStream = await navigator.mediaDevices.getUserMedia({
217+
audio: true,
218+
video: false
219+
})
220+
221+
setupAudioVisualization(audioStream)
222+
223+
status.textContent = 'Establishing connection...'
224+
225+
peerConnection = await createRealtimeSession(
226+
audioStream,
227+
tokenInput.value,
228+
voiceSelect.value
229+
)
230+
231+
status.className = 'status success'
232+
status.textContent = 'Session established successfully!'
233+
startButton.textContent = 'Stop Session'
234+
235+
} catch (err) {
236+
status.className = 'status error'
237+
status.textContent = `Error: ${err.message}`
238+
console.error('Session error:', err)
239+
stopSession()
240+
}
268241
}
269-
startSession()
270-
}
271-
})
272242

273-
// Cleanup on page unload
274-
window.addEventListener('beforeunload', stopSession)
243+
function stopSession() {
244+
if (peerConnection) {
245+
peerConnection.close()
246+
peerConnection = null
247+
}
248+
249+
if (audioContext) {
250+
audioContext.close()
251+
audioContext = null
252+
}
253+
254+
if (audioStream) {
255+
audioStream.getTracks().forEach(track => track.stop())
256+
audioStream = null
257+
}
258+
259+
audioIndicator.classList.remove('active')
260+
startButton.textContent = 'Start Session'
261+
}
262+
263+
startButton.addEventListener('click', () => {
264+
if (peerConnection) {
265+
stopSession()
266+
} else {
267+
if (!tokenInput.value) {
268+
status.className = 'status error'
269+
status.textContent = 'Please enter an API token'
270+
return
271+
}
272+
startSession()
273+
}
274+
})
275+
276+
// Cleanup on page unload
277+
window.addEventListener('beforeunload', stopSession)
275278
</script>
276279
</body>
277280
</html>

0 commit comments

Comments
 (0)