Skip to content

Commit 4b61d05

Browse files
authored
1 parent e92642e commit 4b61d05

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed

openai-audio-output.html

+233
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>OpenAI Text-to-Speech</title>
7+
<style>
8+
body {
9+
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
10+
max-width: 800px;
11+
margin: 20px auto;
12+
padding: 0 20px;
13+
line-height: 1.6;
14+
}
15+
.info {
16+
background: #e8f4ff;
17+
padding: 15px;
18+
border-radius: 4px;
19+
margin: 20px 0;
20+
border-left: 4px solid #0066cc;
21+
}
22+
.input-group {
23+
display: flex;
24+
flex-direction: column;
25+
gap: 12px;
26+
margin-bottom: 20px;
27+
}
28+
textarea {
29+
width: 100%;
30+
min-height: 150px;
31+
padding: 12px;
32+
font-size: 16px;
33+
border: 1px solid #ccc;
34+
border-radius: 4px;
35+
resize: vertical;
36+
}
37+
select {
38+
padding: 8px 12px;
39+
font-size: 16px;
40+
border: 1px solid #ccc;
41+
border-radius: 4px;
42+
max-width: 200px;
43+
}
44+
button {
45+
padding: 8px 16px;
46+
font-size: 16px;
47+
background: #0066cc;
48+
color: white;
49+
border: none;
50+
border-radius: 4px;
51+
cursor: pointer;
52+
max-width: 200px;
53+
}
54+
button:disabled {
55+
background: #cccccc;
56+
}
57+
button:hover:not(:disabled) {
58+
background: #0055aa;
59+
}
60+
.error {
61+
color: #cc0000;
62+
margin: 10px 0;
63+
}
64+
.player-container {
65+
margin: 20px 0;
66+
}
67+
audio {
68+
width: 100%;
69+
margin: 10px 0;
70+
}
71+
.transcript {
72+
background: #f5f5f5;
73+
padding: 15px;
74+
border-radius: 4px;
75+
margin: 10px 0;
76+
}
77+
.loading {
78+
color: #666;
79+
font-style: italic;
80+
}
81+
</style>
82+
</head>
83+
<body><h1>Prompt gpt-4o-audio-preview</h1>
84+
<div class="info">
85+
Enter a prompt below and execute against <code>gpt-4o-audio-preview</code> to hear the results.
86+
</div>
87+
88+
<div class="input-group">
89+
<textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea>
90+
<select id="voiceSelect" aria-label="Voice selection">
91+
<option value="alloy">Alloy</option>
92+
<option value="echo">Echo</option>
93+
<option value="fable">Fable</option>
94+
<option value="onyx">Onyx</option>
95+
<option value="nova">Nova</option>
96+
<option value="shimmer">Shimmer</option>
97+
</select>
98+
<button id="submitBtn">Generate Speech</button>
99+
</div>
100+
101+
<div id="error" class="error" style="display: none;"></div>
102+
<div id="playerContainer" class="player-container" style="display: none;">
103+
<audio id="audioPlayer" controls></audio>
104+
<button id="downloadBtn">Download Audio</button>
105+
<div id="transcript" class="transcript"></div>
106+
</div>
107+
108+
<script>
109+
const promptInput = document.getElementById('promptInput');
110+
const voiceSelect = document.getElementById('voiceSelect');
111+
const submitBtn = document.getElementById('submitBtn');
112+
const errorDiv = document.getElementById('error');
113+
const playerContainer = document.getElementById('playerContainer');
114+
const audioPlayer = document.getElementById('audioPlayer');
115+
const downloadBtn = document.getElementById('downloadBtn');
116+
const transcriptDiv = document.getElementById('transcript');
117+
118+
function showError(message) {
119+
errorDiv.textContent = message;
120+
errorDiv.style.display = 'block';
121+
playerContainer.style.display = 'none';
122+
}
123+
124+
function clearError() {
125+
errorDiv.style.display = 'none';
126+
}
127+
128+
function getAPIKey() {
129+
let apiKey = localStorage.getItem('openai_api_key');
130+
if (!apiKey) {
131+
apiKey = prompt('Please enter your OpenAI API Key:');
132+
if (apiKey) {
133+
localStorage.setItem('openai_api_key', apiKey);
134+
}
135+
}
136+
return apiKey;
137+
}
138+
139+
async function submitToAPI() {
140+
const apiKey = getAPIKey();
141+
if (!apiKey) {
142+
alert('API Key is required.');
143+
return;
144+
}
145+
146+
const voice = voiceSelect.value;
147+
submitBtn.textContent = 'Processing...';
148+
submitBtn.disabled = true;
149+
const prompt = promptInput.value;
150+
151+
const payload = {
152+
"model": "gpt-4o-audio-preview",
153+
"modalities": [
154+
"text",
155+
"audio"
156+
],
157+
"audio": {
158+
"voice": voice,
159+
"format": "wav"
160+
},
161+
"messages": [
162+
{
163+
"role": "user",
164+
"content": prompt
165+
}
166+
]
167+
};
168+
169+
try {
170+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
171+
method: 'POST',
172+
headers: {
173+
'Content-Type': 'application/json',
174+
'Authorization': `Bearer ${apiKey}`
175+
},
176+
body: JSON.stringify(payload)
177+
});
178+
179+
const data = await response.json();
180+
181+
if (!response.ok) {
182+
throw new Error(data.error?.message || 'API request failed');
183+
}
184+
185+
// Extract audio data and transcript
186+
const audioData = data.choices[0].message.audio.data;
187+
const transcript = data.choices[0].message.audio.transcript;
188+
189+
// Create audio blob and URL
190+
const binaryData = atob(audioData);
191+
const arrayBuffer = new ArrayBuffer(binaryData.length);
192+
const uint8Array = new Uint8Array(arrayBuffer);
193+
for (let i = 0; i < binaryData.length; i++) {
194+
uint8Array[i] = binaryData.charCodeAt(i);
195+
}
196+
const blob = new Blob([uint8Array], { type: 'audio/wav' });
197+
const audioUrl = URL.createObjectURL(blob);
198+
199+
// Update UI
200+
audioPlayer.src = audioUrl;
201+
transcriptDiv.textContent = transcript;
202+
playerContainer.style.display = 'block';
203+
clearError();
204+
205+
// Set up download button
206+
downloadBtn.onclick = () => {
207+
const a = document.createElement('a');
208+
a.href = audioUrl;
209+
a.download = 'speech.wav';
210+
document.body.appendChild(a);
211+
a.click();
212+
document.body.removeChild(a);
213+
};
214+
} catch (error) {
215+
console.error('Error:', error);
216+
showError(error.message || 'An error occurred');
217+
} finally {
218+
submitBtn.textContent = 'Generate Speech';
219+
submitBtn.disabled = false;
220+
}
221+
}
222+
223+
// Handle form submission
224+
submitBtn.addEventListener('click', submitToAPI);
225+
226+
promptInput.addEventListener('keypress', (e) => {
227+
if (e.key === 'Enter' && e.ctrlKey) {
228+
submitToAPI();
229+
}
230+
});
231+
</script>
232+
</body>
233+
</html>

0 commit comments

Comments
 (0)