lhotse-speech · pzelasko · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023
diff --git a/lhotse/__init__.py b/lhotse/__init__.py
@@ -2,9 +2,15 @@
     AudioSource,
     Recording,
     RecordingSet,
+    audio_backend,
+    available_audio_backends,
+    get_audio_duration_mismatch_tolerance,
+    get_current_audio_backend,
+    get_default_audio_backend,
+    get_ffmpeg_torchaudio_info_enabled,
     set_audio_duration_mismatch_tolerance,
+    set_current_audio_backend,
     set_ffmpeg_torchaudio_info_enabled,
-    get_ffmpeg_torchaudio_info_enabled,
 )
 from .caching import is_caching_enabled, set_caching_enabled
 from .cut import CutSet, MonoCut, MultiCut, create_cut_set_eager, create_cut_set_lazy

diff --git a/lhotse/audio/__init__.py b/lhotse/audio/__init__.py
@@ -1,4 +1,6 @@
 from .backend import (
+    audio_backend,
+    available_audio_backends,
     get_current_audio_backend,
     get_default_audio_backend,
     get_ffmpeg_torchaudio_info_enabled,

diff --git a/lhotse/audio/backend.py b/lhotse/audio/backend.py
@@ -22,6 +22,104 @@
 from lhotse.utils import Pathlike, Seconds, compute_num_samples
 
 _FFMPEG_TORCHAUDIO_INFO_ENABLED: bool = True
+CURRENT_AUDIO_BACKEND: Optional["AudioBackend"] = None
+
+
+def available_audio_backends() -> List[str]:
+    """
+    Return a list of names of available audio backends, including "default".
+    """
+    return ["default"] + sorted(AudioBackend.KNOWN_BACKENDS.keys())
+
+
+@contextmanager
+def audio_backend(backend: Union["AudioBackend", str]):
+    """
+    Context manager that sets Lhotse's audio backend to the specified value
+    and restores the previous audio backend at the end of its scope.
+
+    Example::
+
+        >>> with audio_backend("LibsndfileBackend"):
+        ...     some_audio_loading_fn()
+    """
+    previous = get_current_audio_backend()
+    set_current_audio_backend(backend)
+    yield
+    set_current_audio_backend(previous)
+
+
+def get_current_audio_backend() -> "AudioBackend":
+    """
+    Return the audio backend currently set by the user, or default.
+    """
+    global CURRENT_AUDIO_BACKEND
+
+    # First check if the user has programmatically overridden the audio backend.
+    if CURRENT_AUDIO_BACKEND is not None:
+        return CURRENT_AUDIO_BACKEND
+
+    # Then, check if the user has overridden the audio backend via an env var.
+    maybe_backend = os.environ.get("LHOTSE_AUDIO_BACKEND")
+    if maybe_backend is not None:
+        set_current_audio_backend(maybe_backend)
+        return CURRENT_AUDIO_BACKEND
+
+    # Lastly, fall back to the default backend.
+    set_current_audio_backend("default")
+    return CURRENT_AUDIO_BACKEND
+
+
+def set_current_audio_backend(backend: Union["AudioBackend", str]) -> None:
+    """
+    Force Lhotse to use a specific audio backend to read every audio file,
+    overriding the default behaviour of educated guessing + trial-and-error.
+
+    Example forcing Lhotse to use ``audioread`` library for every audio loading operation::
+
+        >>> set_current_audio_backend(AudioreadBackend())
+    """
+    global CURRENT_AUDIO_BACKEND
+    if backend == "default":
+        backend = get_default_audio_backend()
+    elif isinstance(backend, str):
+        backend = AudioBackend.new(backend)
+    else:
+        assert isinstance(
+            backend, AudioBackend
+        ), f"Expected str or AudioBackend, got: {backend}"
+    CURRENT_AUDIO_BACKEND = backend
+
+
+@lru_cache(maxsize=1)
+def get_default_audio_backend() -> "AudioBackend":
+    """
+    Return a backend that can be used to read all audio formats supported by Lhotse.
+
+    It first looks for special cases that need very specific handling
+    (such as: opus, sphere/shorten, in-memory buffers)
+    and tries to match them against relevant audio backends.
+
+    Then, it tries to use several audio loading libraries (torchaudio, soundfile, audioread).
+    In case the first fails, it tries the next one, and so on.
+    """
+    return CompositeAudioBackend(
+        [
+            # First handle special cases: OPUS and SPHERE (SPHERE may be encoded with shorten,
+            #   which can only be decoded by binaries "shorten" and "sph2pipe").
+            FfmpegSubprocessOpusBackend(),
+            Sph2pipeSubprocessBackend(),
+            # New FFMPEG backend available only in torchaudio 2.0.x+
+            TorchaudioFFMPEGBackend(),
+            # Prefer libsndfile for in-memory buffers only
+            LibsndfileBackend(),
+            # Torchaudio should be able to deal with most audio types...
+            TorchaudioDefaultBackend(),
+            # ... if not, try audioread...
+            AudioreadBackend(),
+            # ... oops.
+        ]
+    )
 
 
 def set_ffmpeg_torchaudio_info_enabled(enabled: bool) -> None:
@@ -78,6 +176,19 @@
     but it may also fail. Its purpose is more to filter out formats that definitely are not supported.
     """
 
+    KNOWN_BACKENDS = {}
+
+    def __init_subclass__(cls, **kwargs):
+        if cls.__name__ not in AudioBackend.KNOWN_BACKENDS:
+            AudioBackend.KNOWN_BACKENDS[cls.__name__] = cls
+        super().__init_subclass__(**kwargs)
+
+    @classmethod
+    def new(cls, name: str) -> "AudioBackend":
+        if name not in cls.KNOWN_BACKENDS:
+            raise RuntimeError(f"Unknown audio backend name: {name}")
+        return cls.KNOWN_BACKENDS[name]()
+
     def read_audio(
         self,
         path_or_fd: Union[Pathlike, FileObject],
@@ -340,63 +451,6 @@
             )
 
 
-CURRENT_AUDIO_BACKEND = None
-
-
-def get_current_audio_backend() -> AudioBackend:
-    """
-    Return the audio backend currently set by the user, or default.
-    """
-    if CURRENT_AUDIO_BACKEND is not None:
-        return CURRENT_AUDIO_BACKEND
-    return get_default_audio_backend()
-
-
-def set_current_audio_backend(backend: AudioBackend) -> None:
-    """
-    Force Lhotse to use a specific audio backend to read every audio file,
-    overriding the default behaviour of educated guessing + trial-and-error.
-
-    Example forcing Lhotse to use ``audioread`` library for every audio loading operation::
-
-        >>> set_current_audio_backend(AudioreadBackend())
-    """
-    global CURRENT_AUDIO_BACKEND
-    assert isinstance(backend, AudioBackend)
-    CURRENT_AUDIO_BACKEND = backend
-
-
-@lru_cache(maxsize=1)
-def get_default_audio_backend() -> AudioBackend:
-    """
-    Return a backend that can be used to read all audio formats supported by Lhotse.
-
-    It first looks for special cases that need very specific handling
-    (such as: opus, sphere/shorten, in-memory buffers)
-    and tries to match them against relevant audio backends.
-
-    Then, it tries to use several audio loading libraries (torchaudio, soundfile, audioread).
-    In case the first fails, it tries the next one, and so on.
-    """
-    return CompositeAudioBackend(
-        [
-            # First handle special cases: OPUS and SPHERE (SPHERE may be encoded with shorten,
-            #   which can only be decoded by binaries "shorten" and "sph2pipe").
-            FfmpegSubprocessOpusBackend(),
-            Sph2pipeSubprocessBackend(),
-            # New FFMPEG backend available only in torchaudio 2.0.x+
-            TorchaudioFFMPEGBackend(),
-            # Prefer libsndfile for in-memory buffers only
-            LibsndfileBackend(),
-            # Torchaudio should be able to deal with most audio types...
-            TorchaudioDefaultBackend(),
-            # ... if not, try audioread...
-            AudioreadBackend(),
-            # ... oops.
-        ]
-    )
-
-
 class LibsndfileCompatibleAudioInfo(NamedTuple):
     channels: int
     frames: int

diff --git a/lhotse/bin/modes/__init__.py b/lhotse/bin/modes/__init__.py
@@ -7,5 +7,6 @@
 from .recipes import *
 from .shar import *
 from .supervision import *
+from .utils import *
 from .validate import *
 from .workflows import *
diff --git a/lhotse/bin/modes/utils.py b/lhotse/bin/modes/utils.py
@@ -0,0 +1,13 @@
+import click
+
+from .cli_base import cli
+
+
+@cli.command()
+def list_audio_backends():
+    """
+    List the names of all available audio backends.
+    """
+    from lhotse import available_audio_backends
+
+    click.echo(available_audio_backends())
diff --git a/lhotse/recipes/fisher_spanish.py b/lhotse/recipes/fisher_spanish.py
@@ -1,7 +1,7 @@
 """
 About the Fisher Spanish corpus
 
-    This is conversational telephone speech collected as 2-channel μ-law, 8kHz-sampled data. 
+    This is conversational telephone speech collected as 2-channel μ-law, 8kHz-sampled data.
     The catalog number LDC2010S01 for audio corpus and LDC2010T04 for transcripts.
 
     This data is not available for free - your institution needs to have an LDC subscription.
@@ -78,8 +78,9 @@ def prepare_fisher_spanish(
     :return: A dict with manifests. The keys are: ``{'recordings', 'supervisions'}``.
     """
 
-    audio_dir_path, transcript_dir_path = Path(audio_dir_path), Path(
-        transcript_dir_path
+    audio_dir_path, transcript_dir_path = (
+        Path(audio_dir_path),
+        Path(transcript_dir_path),
     )
 
     audio_paths = check_and_rglob(audio_dir_path, "*.sph")

diff --git a/test/audio/test_audio_backend.py b/test/audio/test_audio_backend.py
@@ -0,0 +1,46 @@
+import pytest
+
+import lhotse
+from lhotse.audio.backend import CompositeAudioBackend, LibsndfileBackend
+
+
+def test_default_audio_backend():
+    lhotse.audio.backend.CURRENT_AUDIO_BACKEND = None
+    b = lhotse.get_current_audio_backend()
+    assert isinstance(b, CompositeAudioBackend)
+
+
+def test_list_available_audio_backends():
+    assert lhotse.available_audio_backends() == [
+        "default",
+        "AudioreadBackend",
+        "CompositeAudioBackend",
+        "FfmpegSubprocessOpusBackend",
+        "FfmpegTorchaudioStreamerBackend",
+        "LibsndfileBackend",
+        "Sph2pipeSubprocessBackend",
+        "TorchaudioDefaultBackend",
+        "TorchaudioFFMPEGBackend",
+    ]
+
+
+@pytest.mark.parametrize("backend", ["LibsndfileBackend", LibsndfileBackend()])
+def test_audio_backend_contextmanager(backend):
+    lhotse.audio.backend.CURRENT_AUDIO_BACKEND = None
+    assert isinstance(lhotse.get_current_audio_backend(), CompositeAudioBackend)
+    with lhotse.audio_backend(backend):
+        assert isinstance(lhotse.get_current_audio_backend(), LibsndfileBackend)
+    assert isinstance(lhotse.get_current_audio_backend(), CompositeAudioBackend)
+
+
+@pytest.fixture()
+def backend_set_via_env_var(monkeypatch):
+    lhotse.audio.backend.CURRENT_AUDIO_BACKEND = None
+    monkeypatch.setenv("LHOTSE_AUDIO_BACKEND", "LibsndfileBackend")
+    yield
+    lhotse.set_current_audio_backend("default")
+
+
+def test_envvar_audio_backend(backend_set_via_env_var):
+    b = lhotse.get_current_audio_backend()
+    assert isinstance(b, LibsndfileBackend)