|
56 | 56 | font-weight: bold; |
57 | 57 | } |
58 | 58 |
|
59 | | - input { |
| 59 | + input, select { |
60 | 60 | width: 100%; |
61 | 61 | padding: 8px; |
62 | 62 | font-size: 16px; |
@@ -108,170 +108,173 @@ <h1> |
108 | 108 | <label for="tokenInput">OpenAI API Token</label> |
109 | 109 | <input type="password" id="tokenInput"> |
110 | 110 | </div> |
| 111 | + <div class="form-group"> |
| 112 | + <label for="voiceSelect">Voice</label> |
| 113 | + <select id="voiceSelect"> |
| 114 | + <option value="ash">Ash</option> |
| 115 | + <option value="ballad">Ballad</option> |
| 116 | + <option value="coral">Coral</option> |
| 117 | + <option value="sage">Sage</option> |
| 118 | + <option value="verse">Verse</option> |
| 119 | + </select> |
| 120 | + </div> |
111 | 121 | <button id="startButton">Start Session</button> |
112 | 122 | </div> |
113 | 123 |
|
114 | 124 | <div id="status" class="status"></div> |
115 | 125 | </div> |
116 | 126 |
|
117 | 127 | <script type="module"> |
118 | | -// Text decoder for incoming messages |
119 | | -const decoder = new TextDecoder() |
120 | | - |
121 | | -async function createRealtimeSession(inStream, token) { |
122 | | - const pc = new RTCPeerConnection() |
123 | | - |
124 | | - // Handle incoming audio |
125 | | - pc.ontrack = e => { |
126 | | - const audio = new Audio() |
127 | | - audio.srcObject = e.streams[0] |
128 | | - audio.play() |
129 | | - } |
130 | | - |
131 | | - // Handle incoming text messages via data channel |
132 | | - pc.ondatachannel = event => { |
133 | | - const channel = event.channel |
134 | | - channel.onmessage = msg => { |
135 | | - console.log(decoder.decode(msg.data)) |
| 128 | + async function createRealtimeSession(inStream, token, voice) { |
| 129 | + const pc = new RTCPeerConnection() |
| 130 | + |
| 131 | + // Handle incoming audio |
| 132 | + pc.ontrack = e => { |
| 133 | + const audio = new Audio() |
| 134 | + audio.srcObject = e.streams[0] |
| 135 | + audio.play() |
| 136 | + } |
| 137 | + |
| 138 | + pc.addTrack(inStream.getTracks()[0]) |
| 139 | + |
| 140 | + const offer = await pc.createOffer() |
| 141 | + await pc.setLocalDescription(offer) |
| 142 | + |
| 143 | + const headers = { |
| 144 | + Authorization: `Bearer ${token}`, |
| 145 | + 'Content-Type': 'application/sdp' |
| 146 | + } |
| 147 | + |
| 148 | + const opts = { |
| 149 | + method: 'POST', |
| 150 | + body: offer.sdp, |
| 151 | + headers |
| 152 | + } |
| 153 | + |
| 154 | + const model = 'gpt-4o-realtime-preview-2024-12-17' |
| 155 | + const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts) |
| 156 | + |
| 157 | + await pc.setRemoteDescription({ |
| 158 | + type: 'answer', |
| 159 | + sdp: await resp.text() |
| 160 | + }) |
| 161 | + |
| 162 | + return pc |
136 | 163 | } |
137 | | - } |
138 | | - |
139 | | - pc.addTrack(inStream.getTracks()[0]) |
140 | | - |
141 | | - const offer = await pc.createOffer() |
142 | | - await pc.setLocalDescription(offer) |
143 | | - |
144 | | - const headers = { |
145 | | - Authorization: `Bearer ${token}`, |
146 | | - 'Content-Type': 'application/sdp' |
147 | | - } |
148 | | - |
149 | | - const opts = { |
150 | | - method: 'POST', |
151 | | - body: offer.sdp, |
152 | | - headers |
153 | | - } |
154 | | - |
155 | | - const resp = await fetch('https://api.openai.com/v1/realtime', opts) |
156 | | - await pc.setRemoteDescription({ |
157 | | - type: 'answer', |
158 | | - sdp: await resp.text() |
159 | | - }) |
160 | | - |
161 | | - return pc |
162 | | -} |
163 | | - |
164 | | -const startButton = document.getElementById('startButton') |
165 | | -const tokenInput = document.getElementById('tokenInput') |
166 | | -const status = document.getElementById('status') |
167 | | -const audioIndicator = document.getElementById('audioIndicator') |
168 | 164 |
|
169 | | -let peerConnection = null |
170 | | -let audioContext = null |
171 | | -let audioStream = null |
172 | | - |
173 | | -// Load saved API key on page load |
174 | | -document.addEventListener('DOMContentLoaded', () => { |
175 | | - const savedToken = localStorage.getItem('openai_api_key') |
176 | | - if (savedToken) { |
177 | | - tokenInput.value = savedToken |
178 | | - } |
179 | | -}) |
| 165 | + const startButton = document.getElementById('startButton') |
| 166 | + const tokenInput = document.getElementById('tokenInput') |
| 167 | + const voiceSelect = document.getElementById('voiceSelect') |
| 168 | + const status = document.getElementById('status') |
| 169 | + const audioIndicator = document.getElementById('audioIndicator') |
180 | 170 |
|
181 | | -// Audio visualization |
182 | | -function setupAudioVisualization(stream) { |
183 | | - audioContext = new AudioContext() |
184 | | - const source = audioContext.createMediaStreamSource(stream) |
185 | | - const analyzer = audioContext.createAnalyser() |
186 | | - analyzer.fftSize = 256 |
187 | | - |
188 | | - source.connect(analyzer) |
189 | | - |
190 | | - const bufferLength = analyzer.frequencyBinCount |
191 | | - const dataArray = new Uint8Array(bufferLength) |
192 | | - |
193 | | - function updateIndicator() { |
194 | | - if (!audioContext) return |
195 | | - |
196 | | - analyzer.getByteFrequencyData(dataArray) |
197 | | - const average = dataArray.reduce((a, b) => a + b) / bufferLength |
198 | | - |
199 | | - audioIndicator.classList.toggle('active', average > 30) |
200 | | - requestAnimationFrame(updateIndicator) |
201 | | - } |
202 | | - |
203 | | - updateIndicator() |
204 | | -} |
| 171 | + let peerConnection = null |
| 172 | + let audioContext = null |
| 173 | + let audioStream = null |
205 | 174 |
|
206 | | -async function startSession() { |
207 | | - try { |
208 | | - // Save API key to localStorage |
209 | | - localStorage.setItem('openai_api_key', tokenInput.value) |
210 | | - |
211 | | - status.className = 'status' |
212 | | - status.textContent = 'Requesting microphone access...' |
213 | | - |
214 | | - audioStream = await navigator.mediaDevices.getUserMedia({ |
215 | | - audio: true, |
216 | | - video: false |
| 175 | + // Load saved API key on page load |
| 176 | + document.addEventListener('DOMContentLoaded', () => { |
| 177 | + const savedToken = localStorage.getItem('openai_api_key') |
| 178 | + if (savedToken) { |
| 179 | + tokenInput.value = savedToken |
| 180 | + } |
217 | 181 | }) |
218 | | - |
219 | | - setupAudioVisualization(audioStream) |
220 | | - |
221 | | - status.textContent = 'Establishing connection...' |
222 | | - |
223 | | - peerConnection = await createRealtimeSession( |
224 | | - audioStream, |
225 | | - tokenInput.value |
226 | | - ) |
227 | | - |
228 | | - status.className = 'status success' |
229 | | - status.textContent = 'Session established successfully!' |
230 | | - startButton.textContent = 'Stop Session' |
231 | | - |
232 | | - } catch (err) { |
233 | | - status.className = 'status error' |
234 | | - status.textContent = `Error: ${err.message}` |
235 | | - console.error('Session error:', err) |
236 | | - stopSession() |
237 | | - } |
238 | | -} |
239 | 182 |
|
240 | | -function stopSession() { |
241 | | - if (peerConnection) { |
242 | | - peerConnection.close() |
243 | | - peerConnection = null |
244 | | - } |
245 | | - |
246 | | - if (audioContext) { |
247 | | - audioContext.close() |
248 | | - audioContext = null |
249 | | - } |
250 | | - |
251 | | - if (audioStream) { |
252 | | - audioStream.getTracks().forEach(track => track.stop()) |
253 | | - audioStream = null |
254 | | - } |
255 | | - |
256 | | - audioIndicator.classList.remove('active') |
257 | | - startButton.textContent = 'Start Session' |
258 | | -} |
| 183 | + // Audio visualization |
| 184 | + function setupAudioVisualization(stream) { |
| 185 | + audioContext = new AudioContext() |
| 186 | + const source = audioContext.createMediaStreamSource(stream) |
| 187 | + const analyzer = audioContext.createAnalyser() |
| 188 | + analyzer.fftSize = 256 |
| 189 | + |
| 190 | + source.connect(analyzer) |
| 191 | + |
| 192 | + const bufferLength = analyzer.frequencyBinCount |
| 193 | + const dataArray = new Uint8Array(bufferLength) |
| 194 | + |
| 195 | + function updateIndicator() { |
| 196 | + if (!audioContext) return |
| 197 | + |
| 198 | + analyzer.getByteFrequencyData(dataArray) |
| 199 | + const average = dataArray.reduce((a, b) => a + b) / bufferLength |
| 200 | + |
| 201 | + audioIndicator.classList.toggle('active', average > 30) |
| 202 | + requestAnimationFrame(updateIndicator) |
| 203 | + } |
| 204 | + |
| 205 | + updateIndicator() |
| 206 | + } |
259 | 207 |
|
260 | | -startButton.addEventListener('click', () => { |
261 | | - if (peerConnection) { |
262 | | - stopSession() |
263 | | - } else { |
264 | | - if (!tokenInput.value) { |
265 | | - status.className = 'status error' |
266 | | - status.textContent = 'Please enter an API token' |
267 | | - return |
| 208 | + async function startSession() { |
| 209 | + try { |
| 210 | + // Save API key to localStorage |
| 211 | + localStorage.setItem('openai_api_key', tokenInput.value) |
| 212 | + |
| 213 | + status.className = 'status' |
| 214 | + status.textContent = 'Requesting microphone access...' |
| 215 | + |
| 216 | + audioStream = await navigator.mediaDevices.getUserMedia({ |
| 217 | + audio: true, |
| 218 | + video: false |
| 219 | + }) |
| 220 | + |
| 221 | + setupAudioVisualization(audioStream) |
| 222 | + |
| 223 | + status.textContent = 'Establishing connection...' |
| 224 | + |
| 225 | + peerConnection = await createRealtimeSession( |
| 226 | + audioStream, |
| 227 | + tokenInput.value, |
| 228 | + voiceSelect.value |
| 229 | + ) |
| 230 | + |
| 231 | + status.className = 'status success' |
| 232 | + status.textContent = 'Session established successfully!' |
| 233 | + startButton.textContent = 'Stop Session' |
| 234 | + |
| 235 | + } catch (err) { |
| 236 | + status.className = 'status error' |
| 237 | + status.textContent = `Error: ${err.message}` |
| 238 | + console.error('Session error:', err) |
| 239 | + stopSession() |
| 240 | + } |
268 | 241 | } |
269 | | - startSession() |
270 | | - } |
271 | | -}) |
272 | 242 |
|
273 | | -// Cleanup on page unload |
274 | | -window.addEventListener('beforeunload', stopSession) |
| 243 | + function stopSession() { |
| 244 | + if (peerConnection) { |
| 245 | + peerConnection.close() |
| 246 | + peerConnection = null |
| 247 | + } |
| 248 | + |
| 249 | + if (audioContext) { |
| 250 | + audioContext.close() |
| 251 | + audioContext = null |
| 252 | + } |
| 253 | + |
| 254 | + if (audioStream) { |
| 255 | + audioStream.getTracks().forEach(track => track.stop()) |
| 256 | + audioStream = null |
| 257 | + } |
| 258 | + |
| 259 | + audioIndicator.classList.remove('active') |
| 260 | + startButton.textContent = 'Start Session' |
| 261 | + } |
| 262 | + |
| 263 | + startButton.addEventListener('click', () => { |
| 264 | + if (peerConnection) { |
| 265 | + stopSession() |
| 266 | + } else { |
| 267 | + if (!tokenInput.value) { |
| 268 | + status.className = 'status error' |
| 269 | + status.textContent = 'Please enter an API token' |
| 270 | + return |
| 271 | + } |
| 272 | + startSession() |
| 273 | + } |
| 274 | + }) |
| 275 | + |
| 276 | + // Cleanup on page unload |
| 277 | + window.addEventListener('beforeunload', stopSession) |
275 | 278 | </script> |
276 | 279 | </body> |
277 | 280 | </html> |
0 commit comments