Skip to content

Commit 50ec513

Browse files
committed
Merge branch 'release/0.3.0'
2 parents ceca8e7 + 3a2a155 commit 50ec513

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# CHANGELOG
22

3-
## Next
3+
## Version 0.3.0 (2025-10-10)
44

5-
- fix: add support for any `Mapping` with an "audio" key (not just `dict`)
5+
- feat: add support for passing audio as a `{"audio": str | Path}` mapping
6+
- feat: add support for passing audio as a `{"waveform": np.ndarray | torch.tensor, "sample_rate": int}` mapping
67

78
## Version 0.2.1 (2025-09-20)
89

src/pyannoteai/sdk/client.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@
2424
import importlib.metadata
2525
import io
2626
import os
27+
import tempfile
2728
import time
2829
import warnings
2930
from pathlib import Path
30-
from typing import Callable, Optional, Union, Mapping
31+
from typing import Callable, Mapping, Optional, Union
3132

3233
import requests
3334
from requests import Response
@@ -251,7 +252,7 @@ def _hash_md5(self, file: Union[str, Path]) -> str:
251252

252253
def upload(
253254
self,
254-
audio: str | Path | dict[str, str|Path],
255+
audio: str | Path | dict[str, str | Path],
255256
media_url: Optional[str] = None,
256257
callback: Optional[Callable] = None,
257258
) -> str:
@@ -279,12 +280,39 @@ def upload(
279280
or "media://{md5-hash-of-audio-file}" otherwise.
280281
"""
281282

283+
# whether to delete the audio file after upload. will only be set to True
284+
# when audio is provided as a waveform and saved in a temporary file.
285+
delete = False
286+
282287
if isinstance(audio, Mapping):
283-
if "audio" not in audio:
288+
if "audio" in audio:
289+
audio = audio["audio"]
290+
291+
elif "waveform" in audio:
292+
delete = True
293+
try:
294+
import scipy.io
295+
except ImportError:
296+
raise ImportError(
297+
"To process the waveform directly, you need to install `scipy`."
298+
)
299+
300+
sample_rate = audio["sample_rate"]
301+
waveform = audio["waveform"]
302+
# common pattern is to provide waveform as a torch tensor.
303+
# turn it into a numpy array before passing to scipy.io.wavfile.
304+
if hasattr(audio["waveform"], "numpy"):
305+
waveform = audio["waveform"].numpy(force=True)
306+
307+
# write waveform to a temporary audio file
308+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
309+
scipy.io.wavfile.write(f.name, sample_rate, waveform.squeeze())
310+
f.flush()
311+
audio = f.name
312+
else:
284313
raise ValueError(
285314
"When `audio` is a dict, it must provide the path to the audio file in 'audio' key."
286315
)
287-
audio = audio["audio"]
288316

289317
# get the total size of the file to upload
290318
# to provide progress information to the hook
@@ -318,6 +346,9 @@ def upload(
318346
Failed to upload audio to presigned URL {presigned_url}.
319347
Please check your internet connection or visit https://pyannote.openstatus.dev/ to check the status of the pyannoteAI API."""
320348
)
349+
finally:
350+
if delete and os.path.exists(audio):
351+
os.remove(audio)
321352

322353
# TODO: handle HTTPError returned by the API
323354
response.raise_for_status()

0 commit comments

Comments
 (0)