|
| 1 | +<!DOCTYPE html> |
| 2 | +<html lang="en"> |
| 3 | +<head> |
| 4 | + <meta charset="UTF-8"> |
| 5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 6 | + <title>OpenAI Text-to-Speech</title> |
| 7 | + <style> |
| 8 | + body { |
| 9 | + font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| 10 | + max-width: 800px; |
| 11 | + margin: 20px auto; |
| 12 | + padding: 0 20px; |
| 13 | + line-height: 1.6; |
| 14 | + } |
| 15 | + .info { |
| 16 | + background: #e8f4ff; |
| 17 | + padding: 15px; |
| 18 | + border-radius: 4px; |
| 19 | + margin: 20px 0; |
| 20 | + border-left: 4px solid #0066cc; |
| 21 | + } |
| 22 | + .input-group { |
| 23 | + display: flex; |
| 24 | + flex-direction: column; |
| 25 | + gap: 12px; |
| 26 | + margin-bottom: 20px; |
| 27 | + } |
| 28 | + textarea { |
| 29 | + width: 100%; |
| 30 | + min-height: 150px; |
| 31 | + padding: 12px; |
| 32 | + font-size: 16px; |
| 33 | + border: 1px solid #ccc; |
| 34 | + border-radius: 4px; |
| 35 | + resize: vertical; |
| 36 | + } |
| 37 | + select { |
| 38 | + padding: 8px 12px; |
| 39 | + font-size: 16px; |
| 40 | + border: 1px solid #ccc; |
| 41 | + border-radius: 4px; |
| 42 | + max-width: 200px; |
| 43 | + } |
| 44 | + button { |
| 45 | + padding: 8px 16px; |
| 46 | + font-size: 16px; |
| 47 | + background: #0066cc; |
| 48 | + color: white; |
| 49 | + border: none; |
| 50 | + border-radius: 4px; |
| 51 | + cursor: pointer; |
| 52 | + max-width: 200px; |
| 53 | + } |
| 54 | + button:disabled { |
| 55 | + background: #cccccc; |
| 56 | + } |
| 57 | + button:hover:not(:disabled) { |
| 58 | + background: #0055aa; |
| 59 | + } |
| 60 | + .error { |
| 61 | + color: #cc0000; |
| 62 | + margin: 10px 0; |
| 63 | + } |
| 64 | + .player-container { |
| 65 | + margin: 20px 0; |
| 66 | + } |
| 67 | + audio { |
| 68 | + width: 100%; |
| 69 | + margin: 10px 0; |
| 70 | + } |
| 71 | + .transcript { |
| 72 | + background: #f5f5f5; |
| 73 | + padding: 15px; |
| 74 | + border-radius: 4px; |
| 75 | + margin: 10px 0; |
| 76 | + } |
| 77 | + .loading { |
| 78 | + color: #666; |
| 79 | + font-style: italic; |
| 80 | + } |
| 81 | + </style> |
| 82 | +</head> |
| 83 | +<body><h1>Prompt gpt-4o-audio-preview</h1> |
| 84 | + <div class="info"> |
| 85 | + Enter a prompt below and execute against <code>gpt-4o-audio-preview</code> to hear the results. |
| 86 | + </div> |
| 87 | + |
| 88 | + <div class="input-group"> |
| 89 | + <textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea> |
| 90 | + <select id="voiceSelect" aria-label="Voice selection"> |
| 91 | + <option value="alloy">Alloy</option> |
| 92 | + <option value="echo">Echo</option> |
| 93 | + <option value="fable">Fable</option> |
| 94 | + <option value="onyx">Onyx</option> |
| 95 | + <option value="nova">Nova</option> |
| 96 | + <option value="shimmer">Shimmer</option> |
| 97 | + </select> |
| 98 | + <button id="submitBtn">Generate Speech</button> |
| 99 | + </div> |
| 100 | + |
| 101 | + <div id="error" class="error" style="display: none;"></div> |
| 102 | + <div id="playerContainer" class="player-container" style="display: none;"> |
| 103 | + <audio id="audioPlayer" controls></audio> |
| 104 | + <button id="downloadBtn">Download Audio</button> |
| 105 | + <div id="transcript" class="transcript"></div> |
| 106 | + </div> |
| 107 | + |
| 108 | + <script> |
| 109 | + const promptInput = document.getElementById('promptInput'); |
| 110 | + const voiceSelect = document.getElementById('voiceSelect'); |
| 111 | + const submitBtn = document.getElementById('submitBtn'); |
| 112 | + const errorDiv = document.getElementById('error'); |
| 113 | + const playerContainer = document.getElementById('playerContainer'); |
| 114 | + const audioPlayer = document.getElementById('audioPlayer'); |
| 115 | + const downloadBtn = document.getElementById('downloadBtn'); |
| 116 | + const transcriptDiv = document.getElementById('transcript'); |
| 117 | + |
| 118 | + function showError(message) { |
| 119 | + errorDiv.textContent = message; |
| 120 | + errorDiv.style.display = 'block'; |
| 121 | + playerContainer.style.display = 'none'; |
| 122 | + } |
| 123 | + |
| 124 | + function clearError() { |
| 125 | + errorDiv.style.display = 'none'; |
| 126 | + } |
| 127 | + |
| 128 | + function getAPIKey() { |
| 129 | + let apiKey = localStorage.getItem('openai_api_key'); |
| 130 | + if (!apiKey) { |
| 131 | + apiKey = prompt('Please enter your OpenAI API Key:'); |
| 132 | + if (apiKey) { |
| 133 | + localStorage.setItem('openai_api_key', apiKey); |
| 134 | + } |
| 135 | + } |
| 136 | + return apiKey; |
| 137 | + } |
| 138 | + |
| 139 | + async function submitToAPI() { |
| 140 | + const apiKey = getAPIKey(); |
| 141 | + if (!apiKey) { |
| 142 | + alert('API Key is required.'); |
| 143 | + return; |
| 144 | + } |
| 145 | + |
| 146 | + const voice = voiceSelect.value; |
| 147 | + submitBtn.textContent = 'Processing...'; |
| 148 | + submitBtn.disabled = true; |
| 149 | + const prompt = promptInput.value; |
| 150 | + |
| 151 | + const payload = { |
| 152 | + "model": "gpt-4o-audio-preview", |
| 153 | + "modalities": [ |
| 154 | + "text", |
| 155 | + "audio" |
| 156 | + ], |
| 157 | + "audio": { |
| 158 | + "voice": voice, |
| 159 | + "format": "wav" |
| 160 | + }, |
| 161 | + "messages": [ |
| 162 | + { |
| 163 | + "role": "user", |
| 164 | + "content": prompt |
| 165 | + } |
| 166 | + ] |
| 167 | + }; |
| 168 | + |
| 169 | + try { |
| 170 | + const response = await fetch('https://api.openai.com/v1/chat/completions', { |
| 171 | + method: 'POST', |
| 172 | + headers: { |
| 173 | + 'Content-Type': 'application/json', |
| 174 | + 'Authorization': `Bearer ${apiKey}` |
| 175 | + }, |
| 176 | + body: JSON.stringify(payload) |
| 177 | + }); |
| 178 | + |
| 179 | + const data = await response.json(); |
| 180 | + |
| 181 | + if (!response.ok) { |
| 182 | + throw new Error(data.error?.message || 'API request failed'); |
| 183 | + } |
| 184 | + |
| 185 | + // Extract audio data and transcript |
| 186 | + const audioData = data.choices[0].message.audio.data; |
| 187 | + const transcript = data.choices[0].message.audio.transcript; |
| 188 | + |
| 189 | + // Create audio blob and URL |
| 190 | + const binaryData = atob(audioData); |
| 191 | + const arrayBuffer = new ArrayBuffer(binaryData.length); |
| 192 | + const uint8Array = new Uint8Array(arrayBuffer); |
| 193 | + for (let i = 0; i < binaryData.length; i++) { |
| 194 | + uint8Array[i] = binaryData.charCodeAt(i); |
| 195 | + } |
| 196 | + const blob = new Blob([uint8Array], { type: 'audio/wav' }); |
| 197 | + const audioUrl = URL.createObjectURL(blob); |
| 198 | + |
| 199 | + // Update UI |
| 200 | + audioPlayer.src = audioUrl; |
| 201 | + transcriptDiv.textContent = transcript; |
| 202 | + playerContainer.style.display = 'block'; |
| 203 | + clearError(); |
| 204 | + |
| 205 | + // Set up download button |
| 206 | + downloadBtn.onclick = () => { |
| 207 | + const a = document.createElement('a'); |
| 208 | + a.href = audioUrl; |
| 209 | + a.download = 'speech.wav'; |
| 210 | + document.body.appendChild(a); |
| 211 | + a.click(); |
| 212 | + document.body.removeChild(a); |
| 213 | + }; |
| 214 | + } catch (error) { |
| 215 | + console.error('Error:', error); |
| 216 | + showError(error.message || 'An error occurred'); |
| 217 | + } finally { |
| 218 | + submitBtn.textContent = 'Generate Speech'; |
| 219 | + submitBtn.disabled = false; |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + // Handle form submission |
| 224 | + submitBtn.addEventListener('click', submitToAPI); |
| 225 | + |
| 226 | + promptInput.addEventListener('keypress', (e) => { |
| 227 | + if (e.key === 'Enter' && e.ctrlKey) { |
| 228 | + submitToAPI(); |
| 229 | + } |
| 230 | + }); |
| 231 | + </script> |
| 232 | +</body> |
| 233 | +</html> |
0 commit comments