From 489b769b74aa6301f2e9e8855747a11f593e483d Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Wed, 24 Feb 2021 18:29:53 +0000 Subject: [PATCH 1/4] Removed legacy backends from torchaudio --- docs/source/backend.rst | 109 +--- .../backend/legacy_test.py | 290 ---------- .../backend/soundfile/info_test.py | 2 +- .../backend/soundfile/load_test.py | 2 +- .../backend/soundfile/save_test.py | 2 +- .../torchaudio_unittest/backend/utils_test.py | 20 +- .../common_utils/backend_utils.py | 4 +- .../datasets/tedlium_test.py | 7 - torchaudio/__init__.py | 10 - torchaudio/backend/__init__.py | 12 - torchaudio/backend/_soundfile_backend.py | 449 --------------- torchaudio/backend/common.py | 186 ------- torchaudio/backend/no_backend.py | 11 +- torchaudio/backend/soundfile_backend.py | 518 ++++++++++++++---- torchaudio/backend/sox_backend.py | 294 ---------- torchaudio/backend/sox_io_backend.py | 3 + torchaudio/backend/utils.py | 30 +- torchaudio/csrc/CMakeLists.txt | 1 - torchaudio/csrc/pybind.cpp | 98 ---- torchaudio/csrc/sox/legacy.cpp | 170 ------ torchaudio/csrc/sox/legacy.h | 40 -- torchaudio/datasets/tedlium.py | 6 +- 22 files changed, 436 insertions(+), 1828 deletions(-) delete mode 100644 test/torchaudio_unittest/backend/legacy_test.py delete mode 100644 torchaudio/backend/_soundfile_backend.py delete mode 100644 torchaudio/backend/sox_backend.py delete mode 100644 torchaudio/csrc/sox/legacy.cpp delete mode 100644 torchaudio/csrc/sox/legacy.h diff --git a/docs/source/backend.rst b/docs/source/backend.rst index dcf12c5b2f..9ce1f071b1 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -11,11 +11,7 @@ Overview There are currently four implementations available. * :ref:`"sox_io" ` (default on Linux/macOS) -* :ref:`"sox" ` (deprecated, will be removed in 0.9.0 release) * :ref:`"soundfile" ` (default on Windows) -* :ref:`"soundfile" (legacy interface) ` (deprecated, will be removed in 0.9.0 release) - -The use of ``"sox"`` backend is strongly discouraged as it cannot correctly handle formats other than 16-bit integer WAV. See `#726 `_ for the detail. .. note:: Instead of calling functions in ``torchaudio.backend`` directly, please use ``torchaudio.info``, ``torchaudio.load``, ``torchaudio.load_wav`` and ``torchaudio.save`` with proper backend set with :func:`torchaudio.set_audio_backend`. @@ -23,31 +19,17 @@ The use of ``"sox"`` backend is strongly discouraged as it cannot correctly hand Availability ------------ -``"sox"`` and ``"sox_io"`` backends require C++ extension module, which is included in Linux/macOS binary distributions. These backends are not available on Windows. +``"sox_io"`` backend requires C++ extension module, which is included in Linux/macOS binary distributions. This backend is not available on Windows. ``"soundfile"`` backend requires ``SoundFile``. Please refer to `the SoundFile documentation `_ for the installation. -Changes in default backend and deprecation ------------------------------------------- - -Backend module is going through a major overhaul. The following table summarizes the timeline for the deprecations and removals. - +--------------------+-----------------------+------------------------+ | **Backend** | **0.8.0** | **0.9.0** | +====================+=======================+========================+ | ``"sox_io"`` | Default on Linx/macOS | Default on Linux/macOS | +--------------------+-----------------------+------------------------+ - | ``"sox"`` | Available | Removed | - | (deprecated) | | | - +--------------------+-----------------------+------------------------+ | ``"soundfile"`` | Default on Windows | Default on Windows | +--------------------+-----------------------+------------------------+ - | ``"soundfile"`` | Available | Removed | - | (legacy interface, | | | - | deprecated) | | | - +--------------------+-----------------------+------------------------+ - -* The ``"sox"`` and ``"soundfile" (legacy interface)`` backends are deprecated and will be removed in 0.9.0 release. Common Data Structure ~~~~~~~~~~~~~~~~~~~~~ @@ -59,16 +41,6 @@ AudioMetaData .. autoclass:: torchaudio.backend.common.AudioMetaData -SignalInfo (Deprecated) ------------------------ - -.. autoclass:: torchaudio.backend.common.SignalInfo - -EncodingInfo (Deprecated) -------------------------- - -.. autoclass:: torchaudio.backend.common.EncodingInfo - .. _sox_io_backend: Sox IO Backend @@ -102,46 +74,6 @@ save .. autofunction:: torchaudio.backend.sox_io_backend.save -.. _sox_backend: - -Sox Backend (Deprecated) -~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``"sox"`` backend is available on Linux/macOS and not available on Windows. This backend is deprecated and will be removed in ``0.9.0`` release. - -You can switch from another backend to ``sox`` backend with the following; - -.. code:: - - torchaudio.set_audio_backend("sox") - -info ----- - -.. autofunction:: torchaudio.backend.sox_backend.info - -load ----- - -.. autofunction:: torchaudio.backend.sox_backend.load - -.. autofunction:: torchaudio.backend.sox_backend.load_wav - - -save ----- - -.. autofunction:: torchaudio.backend.sox_backend.save - -others ------- - -.. automodule:: torchaudio.backend.sox_backend - :members: - :exclude-members: info, load, load_wav, save - -.. _soundfile_backend: - Soundfile Backend ~~~~~~~~~~~~~~~~~ @@ -153,48 +85,13 @@ You can switch from another backend to the ``"soundfile"`` backend with the foll torchaudio.set_audio_backend("soundfile") -.. note:: - If you are switching from `"soundfile" (legacy interface) ` backend, set ``torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE`` flag **before** switching the backend. - info ---- -.. autofunction:: torchaudio.backend._soundfile_backend.info - -load ----- - -.. autofunction:: torchaudio.backend._soundfile_backend.load - -.. autofunction:: torchaudio.backend._soundfile_backend.load_wav - - -save ----- - -.. autofunction:: torchaudio.backend._soundfile_backend.save - -.. _soundfile_legacy_backend: - -Legacy Interface (Deprecated) ------------------------------ - -``"soundfile"`` backend with legacy interface is made available for backward compatibility reason, however this interface is deprecated and will be removed in the ``0.9.0`` release. - -To switch to this backend/interface, set ``torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE`` flag **before** switching the backend. - -.. code:: - - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True - torchaudio.set_audio_backend("soundfile") # The legacy interface - -info -^^^^ - .. autofunction:: torchaudio.backend.soundfile_backend.info load -^^^^ +---- .. autofunction:: torchaudio.backend.soundfile_backend.load @@ -202,6 +99,6 @@ load save -^^^^ +---- .. autofunction:: torchaudio.backend.soundfile_backend.save diff --git a/test/torchaudio_unittest/backend/legacy_test.py b/test/torchaudio_unittest/backend/legacy_test.py deleted file mode 100644 index 3dbc419bde..0000000000 --- a/test/torchaudio_unittest/backend/legacy_test.py +++ /dev/null @@ -1,290 +0,0 @@ -import os -import math -import shutil -import tempfile -import unittest - -import torch -import torchaudio -from torchaudio.utils import sox_utils -from torchaudio._internal.module_utils import is_module_available - -from torchaudio_unittest.common_utils import get_asset_path - -BACKENDS = [] -BACKENDS_MP3 = [] - -if is_module_available('soundfile'): - BACKENDS.append('soundfile') - -if is_module_available('torchaudio._torchaudio'): - BACKENDS.append('sox') - - if ( - 'mp3' in sox_utils.list_read_formats() and - 'mp3' in sox_utils.list_write_formats() - ): - BACKENDS_MP3 = ['sox'] - - -def create_temp_assets_dir(): - """ - Creates a temporary directory and moves all files from test/assets there. - Returns a Tuple[string, TemporaryDirectory] which is the folder path - and object. - """ - tmp_dir = tempfile.TemporaryDirectory() - shutil.copytree(get_asset_path(), os.path.join(tmp_dir.name, "assets")) - return tmp_dir.name, tmp_dir - - -class Test_LoadSave(unittest.TestCase): - test_dirpath, test_dir = create_temp_assets_dir() - test_filepath = os.path.join(test_dirpath, "assets", - "steam-train-whistle-daniel_simon.mp3") - test_filepath_wav = os.path.join(test_dirpath, "assets", - "steam-train-whistle-daniel_simon.wav") - - def setUp(self): - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True - - def test_1_save(self): - for backend in BACKENDS_MP3: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_1_save(self.test_filepath, False) - - for backend in BACKENDS: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_1_save(self.test_filepath_wav, True) - - def _test_1_save(self, test_filepath, normalization): - # load signal - x, sr = torchaudio.load(test_filepath, normalization=normalization) - - # check save - new_filepath = os.path.join(self.test_dirpath, "test.wav") - torchaudio.save(new_filepath, x, sr) - self.assertTrue(os.path.isfile(new_filepath)) - os.unlink(new_filepath) - - # check automatic normalization - x /= 1 << 31 - torchaudio.save(new_filepath, x, sr) - self.assertTrue(os.path.isfile(new_filepath)) - os.unlink(new_filepath) - - # test save 1d tensor - x = x[0, :] # get mono signal - x.squeeze_() # remove channel dim - torchaudio.save(new_filepath, x, sr) - self.assertTrue(os.path.isfile(new_filepath)) - os.unlink(new_filepath) - - # don't allow invalid sizes as inputs - with self.assertRaises(ValueError): - x.unsqueeze_(1) # L x C not C x L - torchaudio.save(new_filepath, x, sr) - - with self.assertRaises(ValueError): - x.squeeze_() - x.unsqueeze_(1) - x.unsqueeze_(0) # 1 x L x 1 - torchaudio.save(new_filepath, x, sr) - - # don't save to folders that don't exist - with self.assertRaises(OSError): - new_filepath = os.path.join(self.test_dirpath, "no-path", - "test.wav") - torchaudio.save(new_filepath, x, sr) - - def test_1_save_sine(self): - for backend in BACKENDS: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_1_save_sine() - - def _test_1_save_sine(self): - - # save created file - sinewave_filepath = os.path.join(self.test_dirpath, "assets", - "sinewave.wav") - sr = 16000 - freq = 440 - volume = 0.3 - - y = (torch.cos( - 2 * math.pi * torch.arange(0, 4 * sr).float() * freq / sr)) - y.unsqueeze_(0) - # y is between -1 and 1, so must scale - y = (y * volume * (2**31)).long() - torchaudio.save(sinewave_filepath, y, sr) - self.assertTrue(os.path.isfile(sinewave_filepath)) - - # test precision - new_precision = 32 - new_filepath = os.path.join(self.test_dirpath, "test.wav") - si, ei = torchaudio.info(sinewave_filepath) - torchaudio.save(new_filepath, y, sr, new_precision) - si32, ei32 = torchaudio.info(new_filepath) - self.assertEqual(si.precision, 16) - self.assertEqual(si32.precision, new_precision) - os.unlink(new_filepath) - - def test_2_load(self): - for backend in BACKENDS_MP3: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_2_load(self.test_filepath, 278756) - - for backend in BACKENDS: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_2_load(self.test_filepath_wav, 276858) - - def _test_2_load(self, test_filepath, length): - # check normal loading - x, sr = torchaudio.load(test_filepath) - self.assertEqual(sr, 44100) - self.assertEqual(x.size(), (2, length)) - - # check offset - offset = 15 - x, _ = torchaudio.load(test_filepath) - x_offset, _ = torchaudio.load(test_filepath, offset=offset) - self.assertTrue(x[:, offset:].allclose(x_offset)) - - # check number of frames - n = 201 - x, _ = torchaudio.load(test_filepath, num_frames=n) - self.assertTrue(x.size(), (2, n)) - - # check channels first - x, _ = torchaudio.load(test_filepath, channels_first=False) - self.assertEqual(x.size(), (length, 2)) - - # check raising errors - with self.assertRaises(OSError): - torchaudio.load("file-does-not-exist.mp3") - - with self.assertRaises(OSError): - tdir = os.path.join( - os.path.dirname(self.test_dirpath), "torchaudio") - torchaudio.load(tdir) - - def test_2_load_nonormalization(self): - for backend in BACKENDS_MP3: - if backend == 'sox_io': - continue - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_2_load_nonormalization(self.test_filepath, 278756) - - def _test_2_load_nonormalization(self, test_filepath, length): - - # check no normalizing - x, _ = torchaudio.load(test_filepath, normalization=False) - self.assertTrue(x.min() <= -1.0) - self.assertTrue(x.max() >= 1.0) - - # check different input tensor type - x, _ = torchaudio.load(test_filepath, torch.LongTensor(), normalization=False) - self.assertTrue(isinstance(x, torch.LongTensor)) - - def test_3_load_and_save_is_identity(self): - for backend in BACKENDS: - if backend == 'sox_io': - continue - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_3_load_and_save_is_identity() - - def _test_3_load_and_save_is_identity(self): - input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav') - tensor, sample_rate = torchaudio.load(input_path) - output_path = os.path.join(self.test_dirpath, 'test.wav') - torchaudio.save(output_path, tensor, sample_rate) - tensor2, sample_rate2 = torchaudio.load(output_path) - self.assertTrue(tensor.allclose(tensor2)) - self.assertEqual(sample_rate, sample_rate2) - os.unlink(output_path) - - @unittest.skipIf(any(be not in BACKENDS for be in ["sox", "soundfile"]), "sox and soundfile are not available") - def test_3_load_and_save_is_identity_across_backend(self): - with self.subTest(): - self._test_3_load_and_save_is_identity_across_backend("sox", "soundfile") - with self.subTest(): - self._test_3_load_and_save_is_identity_across_backend("soundfile", "sox") - - def _test_3_load_and_save_is_identity_across_backend(self, backend1, backend2): - torchaudio.set_audio_backend(backend1) - input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav') - tensor1, sample_rate1 = torchaudio.load(input_path) - - output_path = os.path.join(self.test_dirpath, 'test.wav') - torchaudio.save(output_path, tensor1, sample_rate1) - - torchaudio.set_audio_backend(backend2) - tensor2, sample_rate2 = torchaudio.load(output_path) - - self.assertTrue(tensor1.allclose(tensor2)) - self.assertEqual(sample_rate1, sample_rate2) - os.unlink(output_path) - - def test_4_load_partial(self): - for backend in BACKENDS_MP3: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_4_load_partial() - - def _test_4_load_partial(self): - num_frames = 101 - offset = 201 - # load entire mono sinewave wav file, load a partial copy and then compare - input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav') - x_sine_full, sr_sine = torchaudio.load(input_sine_path) - x_sine_part, _ = torchaudio.load(input_sine_path, num_frames=num_frames, offset=offset) - l1_error = x_sine_full[:, offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item() - # test for the correct number of samples and that the correct portion was loaded - self.assertEqual(x_sine_part.size(1), num_frames) - self.assertEqual(l1_error, 0.) - # create a two channel version of this wavefile - x_2ch_sine = x_sine_full.repeat(1, 2) - out_2ch_sine_path = os.path.join(self.test_dirpath, 'assets', '2ch_sinewave.wav') - torchaudio.save(out_2ch_sine_path, x_2ch_sine, sr_sine) - x_2ch_sine_load, _ = torchaudio.load(out_2ch_sine_path, num_frames=num_frames, offset=offset) - os.unlink(out_2ch_sine_path) - l1_error = x_2ch_sine_load.sub(x_2ch_sine[:, offset:(offset + num_frames)]).abs().sum().item() - self.assertEqual(l1_error, 0.) - - # test with two channel mp3 - x_2ch_full, sr_2ch = torchaudio.load(self.test_filepath, normalization=True) - x_2ch_part, _ = torchaudio.load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset) - l1_error = x_2ch_full[:, offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item() - self.assertEqual(x_2ch_part.size(1), num_frames) - self.assertEqual(l1_error, 0.) - - # check behavior if number of samples would exceed file length - offset_ns = 300 - x_ns, _ = torchaudio.load(input_sine_path, num_frames=100000, offset=offset_ns) - self.assertEqual(x_ns.size(1), x_sine_full.size(1) - offset_ns) - - # check when offset is beyond the end of the file - with self.assertRaises(RuntimeError): - torchaudio.load(input_sine_path, offset=100000) - - def test_5_get_info(self): - for backend in BACKENDS: - with self.subTest(): - torchaudio.set_audio_backend(backend) - self._test_5_get_info() - - def _test_5_get_info(self): - input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav') - channels, samples, rate, precision = (1, 64000, 16000, 16) - si, ei = torchaudio.info(input_path) - self.assertEqual(si.channels, channels) - self.assertEqual(si.length, samples) - self.assertEqual(si.rate, rate) - self.assertEqual(ei.bits_per_sample, precision) diff --git a/test/torchaudio_unittest/backend/soundfile/info_test.py b/test/torchaudio_unittest/backend/soundfile/info_test.py index 3b3f792281..23f7e2cb4e 100644 --- a/test/torchaudio_unittest/backend/soundfile/info_test.py +++ b/test/torchaudio_unittest/backend/soundfile/info_test.py @@ -3,7 +3,7 @@ import tarfile import torch -from torchaudio.backend import _soundfile_backend as soundfile_backend +from torchaudio.backend import soundfile_backend from torchaudio._internal import module_utils as _mod_utils from torchaudio_unittest.common_utils import ( diff --git a/test/torchaudio_unittest/backend/soundfile/load_test.py b/test/torchaudio_unittest/backend/soundfile/load_test.py index 399266de8f..0e3a240d26 100644 --- a/test/torchaudio_unittest/backend/soundfile/load_test.py +++ b/test/torchaudio_unittest/backend/soundfile/load_test.py @@ -4,7 +4,7 @@ import torch from torchaudio._internal import module_utils as _mod_utils -from torchaudio.backend import _soundfile_backend as soundfile_backend +from torchaudio.backend import soundfile_backend from parameterized import parameterized from torchaudio_unittest.common_utils import ( diff --git a/test/torchaudio_unittest/backend/soundfile/save_test.py b/test/torchaudio_unittest/backend/soundfile/save_test.py index 2c511ae3a1..06b45a63e6 100644 --- a/test/torchaudio_unittest/backend/soundfile/save_test.py +++ b/test/torchaudio_unittest/backend/soundfile/save_test.py @@ -2,7 +2,7 @@ from unittest.mock import patch from torchaudio._internal import module_utils as _mod_utils -from torchaudio.backend import _soundfile_backend as soundfile_backend +from torchaudio.backend import soundfile_backend from torchaudio_unittest.common_utils import ( TempDirMixin, diff --git a/test/torchaudio_unittest/backend/utils_test.py b/test/torchaudio_unittest/backend/utils_test.py index 3f355be0cb..e7f908762d 100644 --- a/test/torchaudio_unittest/backend/utils_test.py +++ b/test/torchaudio_unittest/backend/utils_test.py @@ -25,31 +25,13 @@ class TestBackendSwitch_NoBackend(BackendSwitchMixin, common_utils.TorchaudioTes backend_module = torchaudio.backend.no_backend -@common_utils.skipIfNoExtension -class TestBackendSwitch_SoX(BackendSwitchMixin, common_utils.TorchaudioTestCase): - backend = 'sox' - backend_module = torchaudio.backend.sox_backend - - @common_utils.skipIfNoExtension class TestBackendSwitch_SoXIO(BackendSwitchMixin, common_utils.TorchaudioTestCase): backend = 'sox_io' backend_module = torchaudio.backend.sox_io_backend -@common_utils.skipIfNoModule('soundfile') -class TestBackendSwitch_soundfile_legacy(BackendSwitchMixin, common_utils.TorchaudioTestCase): - backend = 'soundfile' - backend_module = torchaudio.backend.soundfile_backend - - def setUp(self): - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True - - def tearDown(self): - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = None - - @common_utils.skipIfNoModule('soundfile') class TestBackendSwitch_soundfile(BackendSwitchMixin, common_utils.TorchaudioTestCase): backend = 'soundfile' - backend_module = torchaudio.backend._soundfile_backend + backend_module = torchaudio.backend.soundfile_backend diff --git a/test/torchaudio_unittest/common_utils/backend_utils.py b/test/torchaudio_unittest/common_utils/backend_utils.py index 7e519c80eb..84dd73ed2e 100644 --- a/test/torchaudio_unittest/common_utils/backend_utils.py +++ b/test/torchaudio_unittest/common_utils/backend_utils.py @@ -6,15 +6,13 @@ def set_audio_backend(backend): """Allow additional backend value, 'default'""" backends = torchaudio.list_audio_backends() - if backend == 'soundfile-new': + if backend == 'soundfile': be = 'soundfile' - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False elif backend == 'default': if 'sox_io' in backends: be = 'sox_io' elif 'soundfile' in backends: be = 'soundfile' - torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True else: raise unittest.SkipTest('No default backend available') else: diff --git a/test/torchaudio_unittest/datasets/tedlium_test.py b/test/torchaudio_unittest/datasets/tedlium_test.py index abb6ea5b5c..20e5c2e838 100644 --- a/test/torchaudio_unittest/datasets/tedlium_test.py +++ b/test/torchaudio_unittest/datasets/tedlium_test.py @@ -143,13 +143,6 @@ class TestTedliumSoundfile(Tedlium, TorchaudioTestCase): backend = "soundfile" -class TestTedliumSoundfileNew(Tedlium, TorchaudioTestCase): - backend = "soundfile-new" - - if platform.system() != "Windows": - class TestTedliumSox(Tedlium, TorchaudioTestCase): - backend = "sox" - class TestTedliumSoxIO(Tedlium, TorchaudioTestCase): backend = "sox_io" diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py index 35f5fd41d5..e72ecac4cb 100644 --- a/torchaudio/__init__.py +++ b/torchaudio/__init__.py @@ -10,20 +10,10 @@ transforms, ) -USE_SOUNDFILE_LEGACY_INTERFACE = None - from torchaudio.backend import ( list_audio_backends, get_audio_backend, set_audio_backend, - save_encinfo, - sox_signalinfo_t, - sox_encodinginfo_t, - get_sox_option_t, - get_sox_encoding_t, - get_sox_bool, - SignalInfo, - EncodingInfo, ) try: diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py index 361935229f..c3fdf0b439 100644 --- a/torchaudio/backend/__init__.py +++ b/torchaudio/backend/__init__.py @@ -5,18 +5,6 @@ get_audio_backend, set_audio_backend, ) -from .sox_backend import ( - save_encinfo, - sox_signalinfo_t, - sox_encodinginfo_t, - get_sox_option_t, - get_sox_encoding_t, - get_sox_bool, -) -from .common import ( - SignalInfo, - EncodingInfo, -) utils._init_audio_backend() diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py deleted file mode 100644 index f939548413..0000000000 --- a/torchaudio/backend/_soundfile_backend.py +++ /dev/null @@ -1,449 +0,0 @@ -"""The new soundfile backend which will become default in 0.8.0 onward""" -from typing import Tuple, Optional -import warnings - -import torch -from torchaudio._internal import module_utils as _mod_utils -from .common import AudioMetaData - - -if _mod_utils.is_module_available("soundfile"): - import soundfile - - -# Mapping from soundfile subtype to number of bits per sample. -# This is mostly heuristical and the value is set to 0 when it is irrelevant -# (lossy formats) or when it can't be inferred. -# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard: -# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony, -# the default seems to be 8 bits but it can be compressed further to 4 bits. -# The dict is inspired from -# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 -_SUBTYPE_TO_BITS_PER_SAMPLE = { - 'PCM_S8': 8, # Signed 8 bit data - 'PCM_16': 16, # Signed 16 bit data - 'PCM_24': 24, # Signed 24 bit data - 'PCM_32': 32, # Signed 32 bit data - 'PCM_U8': 8, # Unsigned 8 bit data (WAV and RAW only) - 'FLOAT': 32, # 32 bit float data - 'DOUBLE': 64, # 64 bit float data - 'ULAW': 8, # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types - 'ALAW': 8, # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types - 'IMA_ADPCM': 0, # IMA ADPCM. - 'MS_ADPCM': 0, # Microsoft ADPCM. - 'GSM610': 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) - 'VOX_ADPCM': 0, # OKI / Dialogix ADPCM - 'G721_32': 0, # 32kbs G721 ADPCM encoding. - 'G723_24': 0, # 24kbs G723 ADPCM encoding. - 'G723_40': 0, # 40kbs G723 ADPCM encoding. - 'DWVW_12': 12, # 12 bit Delta Width Variable Word encoding. - 'DWVW_16': 16, # 16 bit Delta Width Variable Word encoding. - 'DWVW_24': 24, # 24 bit Delta Width Variable Word encoding. - 'DWVW_N': 0, # N bit Delta Width Variable Word encoding. - 'DPCM_8': 8, # 8 bit differential PCM (XI only) - 'DPCM_16': 16, # 16 bit differential PCM (XI only) - 'VORBIS': 0, # Xiph Vorbis encoding. (lossy) - 'ALAC_16': 16, # Apple Lossless Audio Codec (16 bit). - 'ALAC_20': 20, # Apple Lossless Audio Codec (20 bit). - 'ALAC_24': 24, # Apple Lossless Audio Codec (24 bit). - 'ALAC_32': 32, # Apple Lossless Audio Codec (32 bit). -} - - -def _get_bit_depth(subtype): - if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: - warnings.warn( - f"The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " - "attribute will be set to 0. If you are seeing this warning, please " - "report by opening an issue on github (after checking for existing/closed ones). " - "You may otherwise ignore this warning." - ) - return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0) - - -_SUBTYPE_TO_ENCODING = { - 'PCM_S8': 'PCM_S', - 'PCM_16': 'PCM_S', - 'PCM_24': 'PCM_S', - 'PCM_32': 'PCM_S', - 'PCM_U8': 'PCM_U', - 'FLOAT': 'PCM_F', - 'DOUBLE': 'PCM_F', - 'ULAW': 'ULAW', - 'ALAW': 'ALAW', - 'VORBIS': 'VORBIS', -} - - -def _get_encoding(format: str, subtype: str): - if format == 'FLAC': - return 'FLAC' - return _SUBTYPE_TO_ENCODING.get(subtype, 'UNKNOWN') - - -@_mod_utils.requires_module("soundfile") -def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: - """Get signal information of an audio file. - - Args: - filepath (path-like object or file-like object): - Source of audio data. - Note: - * This argument is intentionally annotated as ``str`` only, - for the consistency with "sox_io" backend, which has a restriction - on type annotation due to TorchScript compiler compatiblity. - format (str, optional): - Not used. PySoundFile does not accept format hint. - - Returns: - AudioMetaData: meta data of the given audio. - """ - sinfo = soundfile.info(filepath) - return AudioMetaData( - sinfo.samplerate, - sinfo.frames, - sinfo.channels, - bits_per_sample=_get_bit_depth(sinfo.subtype), - encoding=_get_encoding(sinfo.format, sinfo.subtype), - ) - - -_SUBTYPE2DTYPE = { - "PCM_S8": "int8", - "PCM_U8": "uint8", - "PCM_16": "int16", - "PCM_32": "int32", - "FLOAT": "float32", - "DOUBLE": "float64", -} - - -@_mod_utils.requires_module("soundfile") -def load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[torch.Tensor, int]: - """Load audio data from file. - - Note: - The formats this function can handle depend on the soundfile installation. - This function is tested on the following formats; - - * WAV - - * 32-bit floating-point - * 32-bit signed integer - * 16-bit signed integer - * 8-bit unsigned integer - - * FLAC - * OGG/VORBIS - * SPHERE - - By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with - ``float32`` dtype and the shape of ``[channel, time]``. - The samples are normalized to fit in the range of ``[-1.0, 1.0]``. - - When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit - signed integer and 8-bit unsigned integer (24-bit signed integer is not supported), - by providing ``normalize=False``, this function can return integer Tensor, where the samples - are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor - for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. - - ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as - ``flac`` and ``mp3``. - For these formats, this function always returns ``float32`` Tensor with values normalized to - ``[-1.0, 1.0]``. - - Args: - filepath (path-like object or file-like object): - Source of audio data. - Note: - * This argument is intentionally annotated as ``str`` only, - for the consistency with "sox_io" backend, which has a restriction - on type annotation due to TorchScript compiler compatiblity. - frame_offset (int): - Number of frames to skip before start reading data. - num_frames (int): - Maximum number of frames to read. ``-1`` reads all the remaining samples, - starting from ``frame_offset``. - This function may return the less number of frames if there is not enough - frames in the given file. - normalize (bool): - When ``True``, this function always return ``float32``, and sample values are - normalized to ``[-1.0, 1.0]``. - If input file is integer WAV, giving ``False`` will change the resulting Tensor type to - integer type. - This argument has no effect for formats other than integer WAV type. - channels_first (bool): - When True, the returned Tensor has dimension ``[channel, time]``. - Otherwise, the returned Tensor's dimension is ``[time, channel]``. - format (str, optional): - Not used. PySoundFile does not accept format hint. - - Returns: - Tuple[torch.Tensor, int]: Resulting Tensor and sample rate. - If the input file has integer wav format and normalization is off, then it has - integer type, else ``float32`` type. If ``channels_first=True``, it has - ``[channel, time]`` else ``[time, channel]``. - """ - with soundfile.SoundFile(filepath, "r") as file_: - if file_.format != "WAV" or normalize: - dtype = "float32" - elif file_.subtype not in _SUBTYPE2DTYPE: - raise ValueError(f"Unsupported subtype: {file_.subtype}") - else: - dtype = _SUBTYPE2DTYPE[file_.subtype] - - frames = file_._prepare_read(frame_offset, None, num_frames) - waveform = file_.read(frames, dtype, always_2d=True) - sample_rate = file_.samplerate - - waveform = torch.from_numpy(waveform) - if channels_first: - waveform = waveform.t() - return waveform, sample_rate - - -def _get_subtype_for_wav( - dtype: torch.dtype, - encoding: str, - bits_per_sample: int): - if not encoding: - if not bits_per_sample: - subtype = { - torch.uint8: "PCM_U8", - torch.int16: "PCM_16", - torch.int32: "PCM_32", - torch.float32: "FLOAT", - torch.float64: "DOUBLE", - }.get(dtype) - if not subtype: - raise ValueError(f"Unsupported dtype for wav: {dtype}") - return subtype - if bits_per_sample == 8: - return "PCM_U8" - return f"PCM_{bits_per_sample}" - if encoding == "PCM_S": - if not bits_per_sample: - return "PCM_32" - if bits_per_sample == 8: - raise ValueError("wav does not support 8-bit signed PCM encoding.") - return f"PCM_{bits_per_sample}" - if encoding == "PCM_U": - if bits_per_sample in (None, 8): - return "PCM_U8" - raise ValueError("wav only supports 8-bit unsigned PCM encoding.") - if encoding == "PCM_F": - if bits_per_sample in (None, 32): - return "FLOAT" - if bits_per_sample == 64: - return "DOUBLE" - raise ValueError("wav only supports 32/64-bit float PCM encoding.") - if encoding == "ULAW": - if bits_per_sample in (None, 8): - return "ULAW" - raise ValueError("wav only supports 8-bit mu-law encoding.") - if encoding == "ALAW": - if bits_per_sample in (None, 8): - return "ALAW" - raise ValueError("wav only supports 8-bit a-law encoding.") - raise ValueError(f"wav does not support {encoding}.") - - -def _get_subtype_for_sphere(encoding: str, bits_per_sample: int): - if encoding in (None, "PCM_S"): - return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32" - if encoding in ("PCM_U", "PCM_F"): - raise ValueError(f"sph does not support {encoding} encoding.") - if encoding == "ULAW": - if bits_per_sample in (None, 8): - return "ULAW" - raise ValueError("sph only supports 8-bit for mu-law encoding.") - if encoding == "ALAW": - return "ALAW" - raise ValueError(f"sph does not support {encoding}.") - - -def _get_subtype( - dtype: torch.dtype, - format: str, - encoding: str, - bits_per_sample: int): - if format == "wav": - return _get_subtype_for_wav(dtype, encoding, bits_per_sample) - if format == "flac": - if encoding: - raise ValueError("flac does not support encoding.") - if not bits_per_sample: - return "PCM_24" - if bits_per_sample > 24: - raise ValueError("flac does not support bits_per_sample > 24.") - return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}" - if format in ("ogg", "vorbis"): - if encoding or bits_per_sample: - raise ValueError( - "ogg/vorbis does not support encoding/bits_per_sample.") - return "VORBIS" - if format == "sph": - return _get_subtype_for_sphere(encoding, bits_per_sample) - if format in ("nis", "nist"): - return "PCM_16" - raise ValueError(f"Unsupported format: {format}") - - -@_mod_utils.requires_module("soundfile") -def save( - filepath: str, - src: torch.Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): - """Save audio data to file. - - Note: - The formats this function can handle depend on the soundfile installation. - This function is tested on the following formats; - - * WAV - - * 32-bit floating-point - * 32-bit signed integer - * 16-bit signed integer - * 8-bit unsigned integer - - * FLAC - * OGG/VORBIS - * SPHERE - - Args: - filepath (str or pathlib.Path): Path to audio file. - This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str`` - for the consistency with "sox_io" backend, which has a restriction on type annotation - for TorchScript compiler compatiblity. - src (torch.Tensor): Audio data to save. must be 2D tensor. - sample_rate (int): sampling rate - channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``, - otherwise ``[time, channel]``. - compression (Optional[float]): Not used. - It is here only for interface compatibility reson with "sox_io" backend. - format (str, optional): Override the audio format. - When ``filepath`` argument is path-like object, audio format is - inferred from file extension. If the file extension is missing or - different, you can specify the correct format with this argument. - - When ``filepath`` argument is file-like object, - this argument is required. - - Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``, - ``"flac"`` and ``"sph"``. - encoding (str, optional): Changes the encoding for supported formats. - This argument is effective only for supported formats, sush as - ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are; - - - ``"PCM_S"`` (signed integer Linear PCM) - - ``"PCM_U"`` (unsigned integer Linear PCM) - - ``"PCM_F"`` (floating point PCM) - - ``"ULAW"`` (mu-law) - - ``"ALAW"`` (a-law) - - bits_per_sample (int, optional): Changes the bit depth for the - supported formats. - When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``, - you can change the bit depth. - Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``. - - Supported formats/encodings/bit depth/compression are: - - ``"wav"`` - - 32-bit floating-point PCM - - 32-bit signed integer PCM - - 24-bit signed integer PCM - - 16-bit signed integer PCM - - 8-bit unsigned integer PCM - - 8-bit mu-law - - 8-bit a-law - - Note: Default encoding/bit depth is determined by the dtype of - the input Tensor. - - ``"flac"`` - - 8-bit - - 16-bit - - 24-bit (default) - - ``"ogg"``, ``"vorbis"`` - - Doesn't accept changing configuration. - - ``"sph"`` - - 8-bit signed integer PCM - - 16-bit signed integer PCM - - 24-bit signed integer PCM - - 32-bit signed integer PCM (default) - - 8-bit mu-law - - 8-bit a-law - - 16-bit a-law - - 24-bit a-law - - 32-bit a-law - - """ - if src.ndim != 2: - raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.") - if compression is not None: - warnings.warn( - '`save` function of "soundfile" backend does not support "compression" parameter. ' - "The argument is silently ignored." - ) - if hasattr(filepath, 'write'): - if format is None: - raise RuntimeError('`format` is required when saving to file object.') - ext = format.lower() - else: - ext = str(filepath).split(".")[-1].lower() - - if bits_per_sample not in (None, 8, 16, 24, 32, 64): - raise ValueError("Invalid bits_per_sample.") - subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample) - - # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format, - # so we extend the extensions manually here - if ext in ["nis", "nist", "sph"] and format is None: - format = "NIST" - - if channels_first: - src = src.t() - - soundfile.write( - file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format - ) - - -@_mod_utils.requires_module("soundfile") -@_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0") -def load_wav( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - channels_first: bool = True, -) -> Tuple[torch.Tensor, int]: - """Load wave file. - - This function is defined only for the purpose of compatibility against other backend - for simple usecases, such as ``torchaudio.load_wav(filepath)``. - The implementation is same as :py:func:`load`. - """ - return load( - filepath, - frame_offset, - num_frames, - normalize=False, - channels_first=channels_first, - ) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 571f950109..6ccd36e4dc 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -1,7 +1,3 @@ -from typing import Any, Optional -import warnings - - class AudioMetaData: """Return type of ``torchaudio.info`` function. @@ -28,185 +24,3 @@ def __init__( self.num_channels = num_channels self.bits_per_sample = bits_per_sample self.encoding = encoding - - -class SignalInfo: - """One of return types of ``torchaudio.info`` functions. - - This class is used by :ref:`"sox" backend (deprecated)` and - :ref:`"soundfile" backend with the legacy interface (deprecated)`. - - See https://fossies.org/dox/sox-14.4.2/structsox__signalinfo__t.html - - :ivar Optional[int] channels: The number of channels - :ivar Optional[float] rate: Sampleing rate - :ivar Optional[int] precision: Bit depth - :ivar Optional[int] length: For :ref:`sox backend`, the number of samples. - (frames * channels). For :ref:`soundfile backend`, the number of frames. - """ - def __init__(self, - channels: Optional[int] = None, - rate: Optional[float] = None, - precision: Optional[int] = None, - length: Optional[int] = None) -> None: - message = ( - f'{self.__module__}.{self.__class__.__name__} has been deprecated ' - 'and will be removed from 0.9.0 release. ' - 'Please migrate to `AudioMetaData`.' - ) - warnings.warn(message) - self.channels = channels - self.rate = rate - self.precision = precision - self.length = length - - -class EncodingInfo: - """One of return types of ``torchaudio.info`` functions. - - This class is used by :ref:`"sox" backend (deprecated)` and - :ref:`"soundfile" backend with the legacy interface (deprecated)`. - - See https://fossies.org/dox/sox-14.4.2/structsox__encodinginfo__t.html - - :ivar Optional[int] encoding: sox_encoding_t - :ivar Optional[int] bits_per_sample: bit depth - :ivar Optional[float] compression: Compression option - :ivar Any reverse_bytes: - :ivar Any reverse_nibbles: - :ivar Any reverse_bits: - :ivar Optional[bool] opposite_endian: - """ - def __init__(self, - encoding: Any = None, - bits_per_sample: Optional[int] = None, - compression: Optional[float] = None, - reverse_bytes: Any = None, - reverse_nibbles: Any = None, - reverse_bits: Any = None, - opposite_endian: Optional[bool] = None) -> None: - message = ( - f'{self.__module__}.{self.__class__.__name__} has been deprecated ' - 'and will be removed from 0.9.0 release. ' - 'Please migrate to `AudioMetaData`.' - ) - warnings.warn(message) - self.encoding = encoding - self.bits_per_sample = bits_per_sample - self.compression = compression - self.reverse_bytes = reverse_bytes - self.reverse_nibbles = reverse_nibbles - self.reverse_bits = reverse_bits - self.opposite_endian = opposite_endian - - -_LOAD_DOCSTRING = r"""Loads an audio file from disk into a tensor - -Args: - filepath: Path to audio file - - out: An optional output tensor to use instead of creating one. (Default: ``None``) - - normalization: Optional normalization. - If boolean `True`, then output is divided by `1 << 31`. - Assuming the input is signed 32-bit audio, this normalizes to `[-1, 1]`. - If `float`, then output is divided by that number. - If `Callable`, then the output is passed as a paramete to the given function, - then the output is divided by the result. (Default: ``True``) - - channels_first: Set channels first or length first in result. (Default: ``True``) - - num_frames: Number of frames to load. 0 to load everything after the offset. - (Default: ``0``) - - offset: Number of frames from the start of the file to begin data loading. - (Default: ``0``) - - signalinfo: A sox_signalinfo_t type, which could be helpful if the - audio type cannot be automatically determined. (Default: ``None``) - - encodinginfo: A sox_encodinginfo_t type, which could be set if the - audio type cannot be automatically determined. (Default: ``None``) - - filetype: A filetype or extension to be set if sox cannot determine it - automatically. (Default: ``None``) - -Returns: - (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where - L is the number of audio frames and - C is the number of channels. - An integer which is the sample rate of the audio (as listed in the metadata of the file) - -Example - >>> data, sample_rate = torchaudio.load('foo.mp3') - >>> print(data.size()) - torch.Size([2, 278756]) - >>> print(sample_rate) - 44100 - >>> data_vol_normalized, _ = torchaudio.load('foo.mp3', normalization=lambda x: torch.abs(x).max()) - >>> print(data_vol_normalized.abs().max()) - 1. -""" - - -_LOAD_WAV_DOCSTRING = r""" Loads a wave file. - -It assumes that the wav file uses 16 bit per sample that needs normalization by -shifting the input right by 16 bits. - -Args: - filepath: Path to audio file - -Returns: - (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where L is the number - of audio frames and C is the number of channels. An integer which is the sample rate of the - audio (as listed in the metadata of the file) -""" - -_SAVE_DOCSTRING = r"""Saves a Tensor on file as an audio file - -Args: - filepath: Path to audio file - src: An input 2D tensor of shape `[C x L]` or `[L x C]` where L is - the number of audio frames, C is the number of channels - sample_rate: An integer which is the sample rate of the - audio (as listed in the metadata of the file) - precision Bit precision (Default: ``16``) - channels_first (bool, optional): Set channels first or length first in result. ( - Default: ``True``) -""" - - -_INFO_DOCSTRING = r"""Gets metadata from an audio file without loading the signal. - -Args: - filepath: Path to audio file - -Returns: - (sox_signalinfo_t, sox_encodinginfo_t): A si (sox_signalinfo_t) signal - info as a python object. An ei (sox_encodinginfo_t) encoding info - -Example - >>> si, ei = torchaudio.info('foo.wav') - >>> rate, channels, encoding = si.rate, si.channels, ei.encoding -""" - - -def _impl_load(func): - setattr(func, '__doc__', _LOAD_DOCSTRING) - return func - - -def _impl_load_wav(func): - setattr(func, '__doc__', _LOAD_WAV_DOCSTRING) - return func - - -def _impl_save(func): - setattr(func, '__doc__', _SAVE_DOCSTRING) - return func - - -def _impl_info(func): - setattr(func, '__doc__', _INFO_DOCSTRING) - return func diff --git a/torchaudio/backend/no_backend.py b/torchaudio/backend/no_backend.py index 60571c34a9..453bfcdc54 100644 --- a/torchaudio/backend/no_backend.py +++ b/torchaudio/backend/no_backend.py @@ -3,33 +3,24 @@ from torch import Tensor -from . import common -from .common import SignalInfo, EncodingInfo - -@common._impl_load def load(filepath: Union[str, Path], out: Optional[Tensor] = None, normalization: Union[bool, float, Callable] = True, channels_first: bool = True, num_frames: int = 0, offset: int = 0, - signalinfo: Optional[SignalInfo] = None, - encodinginfo: Optional[EncodingInfo] = None, filetype: Optional[str] = None) -> Tuple[Tensor, int]: raise RuntimeError('No audio I/O backend is available.') -@common._impl_load_wav def load_wav(filepath: Union[str, Path], **kwargs: Any) -> Tuple[Tensor, int]: raise RuntimeError('No audio I/O backend is available.') -@common._impl_save def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: raise RuntimeError('No audio I/O backend is available.') -@common._impl_info -def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]: +def info(filepath: str) -> None: raise RuntimeError('No audio I/O backend is available.') diff --git a/torchaudio/backend/soundfile_backend.py b/torchaudio/backend/soundfile_backend.py index 49cada21b0..f939548413 100644 --- a/torchaudio/backend/soundfile_backend.py +++ b/torchaudio/backend/soundfile_backend.py @@ -1,127 +1,449 @@ -import os -from typing import Optional, Tuple +"""The new soundfile backend which will become default in 0.8.0 onward""" +from typing import Tuple, Optional +import warnings import torch -from torch import Tensor +from torchaudio._internal import module_utils as _mod_utils +from .common import AudioMetaData -from torchaudio._internal import ( - module_utils as _mod_utils, - misc_ops as _misc_ops, -) -from . import common -from .common import SignalInfo, EncodingInfo -if _mod_utils.is_module_available('soundfile'): +if _mod_utils.is_module_available("soundfile"): import soundfile -_subtype_to_precision = { - 'PCM_S8': 8, - 'PCM_16': 16, - 'PCM_24': 24, - 'PCM_32': 32, - 'PCM_U8': 8 +# Mapping from soundfile subtype to number of bits per sample. +# This is mostly heuristical and the value is set to 0 when it is irrelevant +# (lossy formats) or when it can't be inferred. +# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard: +# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony, +# the default seems to be 8 bits but it can be compressed further to 4 bits. +# The dict is inspired from +# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 +_SUBTYPE_TO_BITS_PER_SAMPLE = { + 'PCM_S8': 8, # Signed 8 bit data + 'PCM_16': 16, # Signed 16 bit data + 'PCM_24': 24, # Signed 24 bit data + 'PCM_32': 32, # Signed 32 bit data + 'PCM_U8': 8, # Unsigned 8 bit data (WAV and RAW only) + 'FLOAT': 32, # 32 bit float data + 'DOUBLE': 64, # 64 bit float data + 'ULAW': 8, # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types + 'ALAW': 8, # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types + 'IMA_ADPCM': 0, # IMA ADPCM. + 'MS_ADPCM': 0, # Microsoft ADPCM. + 'GSM610': 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) + 'VOX_ADPCM': 0, # OKI / Dialogix ADPCM + 'G721_32': 0, # 32kbs G721 ADPCM encoding. + 'G723_24': 0, # 24kbs G723 ADPCM encoding. + 'G723_40': 0, # 40kbs G723 ADPCM encoding. + 'DWVW_12': 12, # 12 bit Delta Width Variable Word encoding. + 'DWVW_16': 16, # 16 bit Delta Width Variable Word encoding. + 'DWVW_24': 24, # 24 bit Delta Width Variable Word encoding. + 'DWVW_N': 0, # N bit Delta Width Variable Word encoding. + 'DPCM_8': 8, # 8 bit differential PCM (XI only) + 'DPCM_16': 16, # 16 bit differential PCM (XI only) + 'VORBIS': 0, # Xiph Vorbis encoding. (lossy) + 'ALAC_16': 16, # Apple Lossless Audio Codec (16 bit). + 'ALAC_20': 20, # Apple Lossless Audio Codec (20 bit). + 'ALAC_24': 24, # Apple Lossless Audio Codec (24 bit). + 'ALAC_32': 32, # Apple Lossless Audio Codec (32 bit). } -@_mod_utils.requires_module('soundfile') -@common._impl_load -def load(filepath: str, - out: Optional[Tensor] = None, - normalization: Optional[bool] = True, - channels_first: Optional[bool] = True, - num_frames: int = 0, - offset: int = 0, - signalinfo: SignalInfo = None, - encodinginfo: EncodingInfo = None, - filetype: Optional[str] = None) -> Tuple[Tensor, int]: - r"""See torchaudio.load""" - - assert out is None - assert normalization - assert signalinfo is None - assert encodinginfo is None - - # stringify if `pathlib.Path` (noop if already `str`) - filepath = str(filepath) - - # check if valid file - if not os.path.isfile(filepath): - raise OSError("{} not found or is a directory".format(filepath)) - - if num_frames < -1: - raise ValueError("Expected value for num_samples -1 (entire file) or >=0") - if num_frames == 0: - num_frames = -1 - if offset < 0: - raise ValueError("Expected positive offset value") - - # initialize output tensor - # TODO call libsoundfile directly to avoid numpy - out, sample_rate = soundfile.read( - filepath, frames=num_frames, start=offset, dtype="float32", always_2d=True +def _get_bit_depth(subtype): + if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: + warnings.warn( + f"The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " + "attribute will be set to 0. If you are seeing this warning, please " + "report by opening an issue on github (after checking for existing/closed ones). " + "You may otherwise ignore this warning." + ) + return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0) + + +_SUBTYPE_TO_ENCODING = { + 'PCM_S8': 'PCM_S', + 'PCM_16': 'PCM_S', + 'PCM_24': 'PCM_S', + 'PCM_32': 'PCM_S', + 'PCM_U8': 'PCM_U', + 'FLOAT': 'PCM_F', + 'DOUBLE': 'PCM_F', + 'ULAW': 'ULAW', + 'ALAW': 'ALAW', + 'VORBIS': 'VORBIS', +} + + +def _get_encoding(format: str, subtype: str): + if format == 'FLAC': + return 'FLAC' + return _SUBTYPE_TO_ENCODING.get(subtype, 'UNKNOWN') + + +@_mod_utils.requires_module("soundfile") +def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: + """Get signal information of an audio file. + + Args: + filepath (path-like object or file-like object): + Source of audio data. + Note: + * This argument is intentionally annotated as ``str`` only, + for the consistency with "sox_io" backend, which has a restriction + on type annotation due to TorchScript compiler compatiblity. + format (str, optional): + Not used. PySoundFile does not accept format hint. + + Returns: + AudioMetaData: meta data of the given audio. + """ + sinfo = soundfile.info(filepath) + return AudioMetaData( + sinfo.samplerate, + sinfo.frames, + sinfo.channels, + bits_per_sample=_get_bit_depth(sinfo.subtype), + encoding=_get_encoding(sinfo.format, sinfo.subtype), ) - out = torch.from_numpy(out).t() - if not channels_first: - out = out.t() - # normalize if needed - # _audio_normalization(out, normalization) +_SUBTYPE2DTYPE = { + "PCM_S8": "int8", + "PCM_U8": "uint8", + "PCM_16": "int16", + "PCM_32": "int32", + "FLOAT": "float32", + "DOUBLE": "float64", +} + + +@_mod_utils.requires_module("soundfile") +def load( + filepath: str, + frame_offset: int = 0, + num_frames: int = -1, + normalize: bool = True, + channels_first: bool = True, + format: Optional[str] = None, +) -> Tuple[torch.Tensor, int]: + """Load audio data from file. + + Note: + The formats this function can handle depend on the soundfile installation. + This function is tested on the following formats; + + * WAV - return out, sample_rate + * 32-bit floating-point + * 32-bit signed integer + * 16-bit signed integer + * 8-bit unsigned integer + * FLAC + * OGG/VORBIS + * SPHERE -@_mod_utils.requires_module('soundfile') -@_mod_utils.deprecated('Please use "torchaudio.load".', '0.9.0') -@common._impl_load_wav -def load_wav(filepath, **kwargs): - kwargs['normalization'] = 1 << 16 - return load(filepath, **kwargs) + By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with + ``float32`` dtype and the shape of ``[channel, time]``. + The samples are normalized to fit in the range of ``[-1.0, 1.0]``. + When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit + signed integer and 8-bit unsigned integer (24-bit signed integer is not supported), + by providing ``normalize=False``, this function can return integer Tensor, where the samples + are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor + for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. -@_mod_utils.requires_module('soundfile') -@common._impl_save -def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: - r"""See torchaudio.save""" + ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as + ``flac`` and ``mp3``. + For these formats, this function always returns ``float32`` Tensor with values normalized to + ``[-1.0, 1.0]``. - ch_idx, len_idx = (0, 1) if channels_first else (1, 0) + Args: + filepath (path-like object or file-like object): + Source of audio data. + Note: + * This argument is intentionally annotated as ``str`` only, + for the consistency with "sox_io" backend, which has a restriction + on type annotation due to TorchScript compiler compatiblity. + frame_offset (int): + Number of frames to skip before start reading data. + num_frames (int): + Maximum number of frames to read. ``-1`` reads all the remaining samples, + starting from ``frame_offset``. + This function may return the less number of frames if there is not enough + frames in the given file. + normalize (bool): + When ``True``, this function always return ``float32``, and sample values are + normalized to ``[-1.0, 1.0]``. + If input file is integer WAV, giving ``False`` will change the resulting Tensor type to + integer type. + This argument has no effect for formats other than integer WAV type. + channels_first (bool): + When True, the returned Tensor has dimension ``[channel, time]``. + Otherwise, the returned Tensor's dimension is ``[time, channel]``. + format (str, optional): + Not used. PySoundFile does not accept format hint. - # check if save directory exists - abs_dirpath = os.path.dirname(os.path.abspath(filepath)) - if not os.path.isdir(abs_dirpath): - raise OSError("Directory does not exist: {}".format(abs_dirpath)) - # check that src is a CPU tensor - _misc_ops.check_input(src) - # Check/Fix shape of source data - if src.dim() == 1: - # 1d tensors as assumed to be mono signals - src.unsqueeze_(ch_idx) - elif src.dim() > 2 or src.size(ch_idx) > 16: - # assumes num_channels < 16 - raise ValueError( - "Expected format where C < 16, but found {}".format(src.size())) + Returns: + Tuple[torch.Tensor, int]: Resulting Tensor and sample rate. + If the input file has integer wav format and normalization is off, then it has + integer type, else ``float32`` type. If ``channels_first=True``, it has + ``[channel, time]`` else ``[time, channel]``. + """ + with soundfile.SoundFile(filepath, "r") as file_: + if file_.format != "WAV" or normalize: + dtype = "float32" + elif file_.subtype not in _SUBTYPE2DTYPE: + raise ValueError(f"Unsupported subtype: {file_.subtype}") + else: + dtype = _SUBTYPE2DTYPE[file_.subtype] + frames = file_._prepare_read(frame_offset, None, num_frames) + waveform = file_.read(frames, dtype, always_2d=True) + sample_rate = file_.samplerate + + waveform = torch.from_numpy(waveform) if channels_first: - src = src.t() + waveform = waveform.t() + return waveform, sample_rate + + +def _get_subtype_for_wav( + dtype: torch.dtype, + encoding: str, + bits_per_sample: int): + if not encoding: + if not bits_per_sample: + subtype = { + torch.uint8: "PCM_U8", + torch.int16: "PCM_16", + torch.int32: "PCM_32", + torch.float32: "FLOAT", + torch.float64: "DOUBLE", + }.get(dtype) + if not subtype: + raise ValueError(f"Unsupported dtype for wav: {dtype}") + return subtype + if bits_per_sample == 8: + return "PCM_U8" + return f"PCM_{bits_per_sample}" + if encoding == "PCM_S": + if not bits_per_sample: + return "PCM_32" + if bits_per_sample == 8: + raise ValueError("wav does not support 8-bit signed PCM encoding.") + return f"PCM_{bits_per_sample}" + if encoding == "PCM_U": + if bits_per_sample in (None, 8): + return "PCM_U8" + raise ValueError("wav only supports 8-bit unsigned PCM encoding.") + if encoding == "PCM_F": + if bits_per_sample in (None, 32): + return "FLOAT" + if bits_per_sample == 64: + return "DOUBLE" + raise ValueError("wav only supports 32/64-bit float PCM encoding.") + if encoding == "ULAW": + if bits_per_sample in (None, 8): + return "ULAW" + raise ValueError("wav only supports 8-bit mu-law encoding.") + if encoding == "ALAW": + if bits_per_sample in (None, 8): + return "ALAW" + raise ValueError("wav only supports 8-bit a-law encoding.") + raise ValueError(f"wav does not support {encoding}.") + + +def _get_subtype_for_sphere(encoding: str, bits_per_sample: int): + if encoding in (None, "PCM_S"): + return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32" + if encoding in ("PCM_U", "PCM_F"): + raise ValueError(f"sph does not support {encoding} encoding.") + if encoding == "ULAW": + if bits_per_sample in (None, 8): + return "ULAW" + raise ValueError("sph only supports 8-bit for mu-law encoding.") + if encoding == "ALAW": + return "ALAW" + raise ValueError(f"sph does not support {encoding}.") + + +def _get_subtype( + dtype: torch.dtype, + format: str, + encoding: str, + bits_per_sample: int): + if format == "wav": + return _get_subtype_for_wav(dtype, encoding, bits_per_sample) + if format == "flac": + if encoding: + raise ValueError("flac does not support encoding.") + if not bits_per_sample: + return "PCM_24" + if bits_per_sample > 24: + raise ValueError("flac does not support bits_per_sample > 24.") + return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}" + if format in ("ogg", "vorbis"): + if encoding or bits_per_sample: + raise ValueError( + "ogg/vorbis does not support encoding/bits_per_sample.") + return "VORBIS" + if format == "sph": + return _get_subtype_for_sphere(encoding, bits_per_sample) + if format in ("nis", "nist"): + return "PCM_16" + raise ValueError(f"Unsupported format: {format}") - if src.dtype == torch.int64: - # Soundfile doesn't support int64 - src = src.type(torch.int32) - precision = "PCM_S8" if precision == 8 else "PCM_" + str(precision) +@_mod_utils.requires_module("soundfile") +def save( + filepath: str, + src: torch.Tensor, + sample_rate: int, + channels_first: bool = True, + compression: Optional[float] = None, + format: Optional[str] = None, + encoding: Optional[str] = None, + bits_per_sample: Optional[int] = None, +): + """Save audio data to file. - return soundfile.write(filepath, src, sample_rate, precision) + Note: + The formats this function can handle depend on the soundfile installation. + This function is tested on the following formats; + * WAV -@_mod_utils.requires_module('soundfile') -@common._impl_info -def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]: - r"""See torchaudio.info""" + * 32-bit floating-point + * 32-bit signed integer + * 16-bit signed integer + * 8-bit unsigned integer - sfi = soundfile.info(filepath) + * FLAC + * OGG/VORBIS + * SPHERE - precision = _subtype_to_precision[sfi.subtype] - si = SignalInfo(sfi.channels, sfi.samplerate, precision, sfi.frames) - ei = EncodingInfo(bits_per_sample=precision) - return si, ei + Args: + filepath (str or pathlib.Path): Path to audio file. + This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str`` + for the consistency with "sox_io" backend, which has a restriction on type annotation + for TorchScript compiler compatiblity. + src (torch.Tensor): Audio data to save. must be 2D tensor. + sample_rate (int): sampling rate + channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``, + otherwise ``[time, channel]``. + compression (Optional[float]): Not used. + It is here only for interface compatibility reson with "sox_io" backend. + format (str, optional): Override the audio format. + When ``filepath`` argument is path-like object, audio format is + inferred from file extension. If the file extension is missing or + different, you can specify the correct format with this argument. + + When ``filepath`` argument is file-like object, + this argument is required. + + Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``, + ``"flac"`` and ``"sph"``. + encoding (str, optional): Changes the encoding for supported formats. + This argument is effective only for supported formats, sush as + ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are; + + - ``"PCM_S"`` (signed integer Linear PCM) + - ``"PCM_U"`` (unsigned integer Linear PCM) + - ``"PCM_F"`` (floating point PCM) + - ``"ULAW"`` (mu-law) + - ``"ALAW"`` (a-law) + + bits_per_sample (int, optional): Changes the bit depth for the + supported formats. + When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``, + you can change the bit depth. + Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``. + + Supported formats/encodings/bit depth/compression are: + + ``"wav"`` + - 32-bit floating-point PCM + - 32-bit signed integer PCM + - 24-bit signed integer PCM + - 16-bit signed integer PCM + - 8-bit unsigned integer PCM + - 8-bit mu-law + - 8-bit a-law + + Note: Default encoding/bit depth is determined by the dtype of + the input Tensor. + + ``"flac"`` + - 8-bit + - 16-bit + - 24-bit (default) + + ``"ogg"``, ``"vorbis"`` + - Doesn't accept changing configuration. + + ``"sph"`` + - 8-bit signed integer PCM + - 16-bit signed integer PCM + - 24-bit signed integer PCM + - 32-bit signed integer PCM (default) + - 8-bit mu-law + - 8-bit a-law + - 16-bit a-law + - 24-bit a-law + - 32-bit a-law + + """ + if src.ndim != 2: + raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.") + if compression is not None: + warnings.warn( + '`save` function of "soundfile" backend does not support "compression" parameter. ' + "The argument is silently ignored." + ) + if hasattr(filepath, 'write'): + if format is None: + raise RuntimeError('`format` is required when saving to file object.') + ext = format.lower() + else: + ext = str(filepath).split(".")[-1].lower() + + if bits_per_sample not in (None, 8, 16, 24, 32, 64): + raise ValueError("Invalid bits_per_sample.") + subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample) + + # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format, + # so we extend the extensions manually here + if ext in ["nis", "nist", "sph"] and format is None: + format = "NIST" + + if channels_first: + src = src.t() + + soundfile.write( + file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format + ) + + +@_mod_utils.requires_module("soundfile") +@_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0") +def load_wav( + filepath: str, + frame_offset: int = 0, + num_frames: int = -1, + channels_first: bool = True, +) -> Tuple[torch.Tensor, int]: + """Load wave file. + + This function is defined only for the purpose of compatibility against other backend + for simple usecases, such as ``torchaudio.load_wav(filepath)``. + The implementation is same as :py:func:`load`. + """ + return load( + filepath, + frame_offset, + num_frames, + normalize=False, + channels_first=channels_first, + ) diff --git a/torchaudio/backend/sox_backend.py b/torchaudio/backend/sox_backend.py deleted file mode 100644 index bf167dd195..0000000000 --- a/torchaudio/backend/sox_backend.py +++ /dev/null @@ -1,294 +0,0 @@ -import os.path -from typing import Any, Optional, Tuple - -import torch -from torch import Tensor - -from torchaudio._internal import ( - module_utils as _mod_utils, - misc_ops as _misc_ops, -) -from . import common -from .common import SignalInfo, EncodingInfo - -if _mod_utils.is_module_available('torchaudio._torchaudio'): - from torchaudio import _torchaudio - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@common._impl_load -def load(filepath: str, - out: Optional[Tensor] = None, - normalization: bool = True, - channels_first: bool = True, - num_frames: int = 0, - offset: int = 0, - signalinfo: SignalInfo = None, - encodinginfo: EncodingInfo = None, - filetype: Optional[str] = None) -> Tuple[Tensor, int]: - r"""See torchaudio.load""" - - # stringify if `pathlib.Path` (noop if already `str`) - filepath = str(filepath) - # check if valid file - if not os.path.isfile(filepath): - raise OSError("{} not found or is a directory".format(filepath)) - - # initialize output tensor - if out is not None: - _misc_ops.check_input(out) - else: - out = torch.FloatTensor() - - if num_frames < -1: - raise ValueError("Expected value for num_samples -1 (entire file) or >=0") - if offset < 0: - raise ValueError("Expected positive offset value") - - sample_rate = _torchaudio.read_audio_file( - filepath, - out, - channels_first, - num_frames, - offset, - signalinfo, - encodinginfo, - filetype - ) - - # normalize if needed - _misc_ops.normalize_audio(out, normalization) - - return out, sample_rate - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated('Please use "torchaudio.load".', '0.9.0') -@common._impl_load_wav -def load_wav(filepath, **kwargs): - kwargs['normalization'] = 1 << 16 - return load(filepath, **kwargs) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@common._impl_save -def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None: - r"""See torchaudio.save""" - - si = sox_signalinfo_t() - ch_idx = 0 if channels_first else 1 - si.rate = sample_rate - si.channels = 1 if src.dim() == 1 else src.size(ch_idx) - si.length = src.numel() - si.precision = precision - return save_encinfo(filepath, src, channels_first, si) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@common._impl_info -def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]: - r"""See torchaudio.info""" - return _torchaudio.get_info(filepath) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def save_encinfo(filepath: str, - src: Tensor, - channels_first: bool = True, - signalinfo: Optional[SignalInfo] = None, - encodinginfo: Optional[EncodingInfo] = None, - filetype: Optional[str] = None) -> None: - r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc. - - Args: - filepath (str): Path to audio file - src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is - the number of audio frames, C is the number of channels - channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``) - signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the - audio type cannot be automatically determined (Default: ``None``). - encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the - audio type cannot be automatically determined (Default: ``None``). - filetype (str, optional): A filetype or extension to be set if sox cannot determine it - automatically. (Default: ``None``) - - Example - >>> data, sample_rate = torchaudio.load('foo.mp3') - >>> torchaudio.save('foo.wav', data, sample_rate) - - """ - ch_idx, len_idx = (0, 1) if channels_first else (1, 0) - - # check if save directory exists - abs_dirpath = os.path.dirname(os.path.abspath(filepath)) - if not os.path.isdir(abs_dirpath): - raise OSError("Directory does not exist: {}".format(abs_dirpath)) - # check that src is a CPU tensor - _misc_ops.check_input(src) - # Check/Fix shape of source data - if src.dim() == 1: - # 1d tensors as assumed to be mono signals - src.unsqueeze_(ch_idx) - elif src.dim() > 2 or src.size(ch_idx) > 16: - # assumes num_channels < 16 - raise ValueError( - "Expected format where C < 16, but found {}".format(src.size())) - # sox stores the sample rate as a float, though practically sample rates are almost always integers - # convert integers to floats - if signalinfo: - if signalinfo.rate and not isinstance(signalinfo.rate, float): - if float(signalinfo.rate) == signalinfo.rate: - signalinfo.rate = float(signalinfo.rate) - else: - raise TypeError('Sample rate should be a float or int') - # check if the bit precision (i.e. bits per sample) is an integer - if signalinfo.precision and not isinstance(signalinfo.precision, int): - if int(signalinfo.precision) == signalinfo.precision: - signalinfo.precision = int(signalinfo.precision) - else: - raise TypeError('Bit precision should be an integer') - # programs such as librosa normalize the signal, unnormalize if detected - if src.min() >= -1.0 and src.max() <= 1.0: - src = src * (1 << 31) - src = src.long() - # set filetype and allow for files with no extensions - extension = os.path.splitext(filepath)[1] - filetype = extension[1:] if len(extension) > 0 else filetype - # transpose from C x L -> L x C - if channels_first: - src = src.transpose(1, 0) - # save data to file - src = src.contiguous() - _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def sox_signalinfo_t() -> SignalInfo: - r"""Create a sox_signalinfo_t object. This object can be used to set the sample - rate, number of channels, length, bit precision and headroom multiplier - primarily for effects - - Returns: sox_signalinfo_t(object) - - rate (float), sample rate as a float, practically will likely be an integer float - - channel (int), number of audio channels - - precision (int), bit precision - - length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown - - mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier - - Example - >>> si = torchaudio.sox_signalinfo_t() - >>> si.channels = 1 - >>> si.rate = 16000. - >>> si.precision = 16 - >>> si.length = 0 - """ - return _torchaudio.sox_signalinfo_t() - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def sox_encodinginfo_t() -> EncodingInfo: - r"""Create a sox_encodinginfo_t object. This object can be used to set the encoding - type, bit precision, compression factor, reverse bytes, reverse nibbles, - reverse bits and endianness. This can be used in an effects chain to encode the - final output or to save a file with a specific encoding. For example, one could - use the sox ulaw encoding to do 8-bit ulaw encoding. Note in a tensor output - the result will be a 32-bit number, but number of unique values will be determined by - the bit precision. - - Returns: sox_encodinginfo_t(object) - - encoding (sox_encoding_t), output encoding - - bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t - - compression (float), compression for lossy formats, 0.0 for default compression - - reverse_bytes (sox_option_t), reverse bytes, use sox_option_default - - reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default - - reverse_bits (sox_option_t), reverse bytes, use sox_option_default - - opposite_endian (sox_bool), change endianness, use sox_false - - Example - >>> ei = torchaudio.sox_encodinginfo_t() - >>> ei.encoding = torchaudio.get_sox_encoding_t(1) - >>> ei.bits_per_sample = 16 - >>> ei.compression = 0 - >>> ei.reverse_bytes = torchaudio.get_sox_option_t(2) - >>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2) - >>> ei.reverse_bits = torchaudio.get_sox_option_t(2) - >>> ei.opposite_endian = torchaudio.get_sox_bool(0) - - """ - ei = _torchaudio.sox_encodinginfo_t() - sdo = get_sox_option_t(2) # sox_default_option - ei.reverse_bytes = sdo - ei.reverse_nibbles = sdo - ei.reverse_bits = sdo - return ei - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def get_sox_encoding_t(i: int = None) -> EncodingInfo: - r"""Get enum of sox_encoding_t for sox encodings. - - Args: - i (int, optional): Choose type or get a dict with all possible options - use ``__members__`` to see all options when not specified. (Default: ``None``) - - Returns: - sox_encoding_t: A sox_encoding_t type for output encoding - """ - if i is None: - # one can see all possible values using the .__members__ attribute - return _torchaudio.sox_encoding_t - else: - return _torchaudio.sox_encoding_t(i) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def get_sox_option_t(i: int = 2) -> Any: - r"""Get enum of sox_option_t for sox encodinginfo options. - - Args: - i (int, optional): Choose type or get a dict with all possible options - use ``__members__`` to see all options when not specified. - (Default: ``sox_option_default`` or ``2``) - Returns: - sox_option_t: A sox_option_t type - """ - if i is None: - return _torchaudio.sox_option_t - else: - return _torchaudio.sox_option_t(i) - - -@_mod_utils.requires_module('torchaudio._torchaudio') -@_mod_utils.deprecated( - 'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail', - '0.9.0') -def get_sox_bool(i: int = 0) -> Any: - r"""Get enum of sox_bool for sox encodinginfo options. - - Args: - i (int, optional): Choose type or get a dict with all possible options - use ``__members__`` to see all options when not specified. (Default: - ``sox_false`` or ``0``) - - Returns: - sox_bool: A sox_bool type - """ - if i is None: - return _torchaudio.sox_bool - else: - return _torchaudio.sox_bool(i) diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index ecfd5ebd42..8b65f9020f 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -9,6 +9,9 @@ import torchaudio from .common import AudioMetaData +if _mod_utils.is_module_available('torchaudio._torchaudio'): + from torchaudio import _torchaudio + @_mod_utils.requires_module('torchaudio._torchaudio') def info( diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py index b9f6b13edf..a5562d718d 100644 --- a/torchaudio/backend/utils.py +++ b/torchaudio/backend/utils.py @@ -6,10 +6,8 @@ from torchaudio._internal.module_utils import is_module_available from . import ( no_backend, - sox_backend, sox_io_backend, soundfile_backend, - _soundfile_backend, ) __all__ = [ @@ -29,7 +27,6 @@ def list_audio_backends() -> List[str]: if is_module_available('soundfile'): backends.append('soundfile') if is_module_available('torchaudio._torchaudio'): - backends.append('sox') backends.append('sox_io') return backends @@ -39,15 +36,9 @@ def set_audio_backend(backend: Optional[str]): Args: backend (Optional[str]): Name of the backend. - One of ``"sox"``, ``"sox_io"`` or ``"soundfile"`` based on availability + One of ``"sox_io"`` or ``"soundfile"`` based on availability of the system. If ``None`` is provided the current backend is unassigned. """ - if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE is not None: - warnings.warn( - '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is deprecated and will be removed in 0.9.0. ' - 'Please remove the use of flag.' - ) - if backend is not None and backend not in list_audio_backends(): raise RuntimeError( f'Backend "{backend}" is not one of ' @@ -55,23 +46,10 @@ def set_audio_backend(backend: Optional[str]): if backend is None: module = no_backend - elif backend == 'sox': - warnings.warn( - '"sox" backend is deprecated and will be removed in 0.9.0. ' - 'Please use "sox_io" backend.' - ) - module = sox_backend elif backend == 'sox_io': module = sox_io_backend elif backend == 'soundfile': - if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE: - warnings.warn( - 'The legacy interface of "soundfile" backend is deprecated and will be removed in 0.9.0. ' - 'Please migrate to the new interface.' - ) - module = soundfile_backend - else: - module = _soundfile_backend + module = soundfile_backend else: raise NotImplementedError(f'Unexpected backend "{backend}"') @@ -98,10 +76,8 @@ def get_audio_backend() -> Optional[str]: """ if torchaudio.load == no_backend.load: return None - if torchaudio.load == sox_backend.load: - return 'sox' if torchaudio.load == sox_io_backend.load: return 'sox_io' - if torchaudio.load in [soundfile_backend.load, _soundfile_backend.load]: + if torchaudio.load == soundfile_backend.load: return 'soundfile' raise ValueError('Unknown backend.') diff --git a/torchaudio/csrc/CMakeLists.txt b/torchaudio/csrc/CMakeLists.txt index 1bab67be5a..7d4d4e7741 100644 --- a/torchaudio/csrc/CMakeLists.txt +++ b/torchaudio/csrc/CMakeLists.txt @@ -60,7 +60,6 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION) _torchaudio SHARED pybind.cpp - sox/legacy.cpp ${LIBTORCHAUDIO_SOURCES} ) diff --git a/torchaudio/csrc/pybind.cpp b/torchaudio/csrc/pybind.cpp index 373b9d0d96..fc17f7da5a 100644 --- a/torchaudio/csrc/pybind.cpp +++ b/torchaudio/csrc/pybind.cpp @@ -1,106 +1,8 @@ #include #include #include -#include PYBIND11_MODULE(_torchaudio, m) { - py::class_(m, "sox_signalinfo_t") - .def(py::init<>()) - .def( - "__repr__", - [](const sox_signalinfo_t& self) { - std::stringstream ss; - ss << "sox_signalinfo_t {\n" - << " rate-> " << self.rate << "\n" - << " channels-> " << self.channels << "\n" - << " precision-> " << self.precision << "\n" - << " length-> " << self.length << "\n" - << " mult-> " << self.mult << "\n" - << "}\n"; - return ss.str(); - }) - .def_readwrite("rate", &sox_signalinfo_t::rate) - .def_readwrite("channels", &sox_signalinfo_t::channels) - .def_readwrite("precision", &sox_signalinfo_t::precision) - .def_readwrite("length", &sox_signalinfo_t::length) - .def_readwrite("mult", &sox_signalinfo_t::mult); - py::class_(m, "sox_encodinginfo_t") - .def(py::init<>()) - .def( - "__repr__", - [](const sox_encodinginfo_t& self) { - std::stringstream ss; - ss << "sox_encodinginfo_t {\n" - << " encoding-> " << self.encoding << "\n" - << " bits_per_sample-> " << self.bits_per_sample << "\n" - << " compression-> " << self.compression << "\n" - << " reverse_bytes-> " << self.reverse_bytes << "\n" - << " reverse_nibbles-> " << self.reverse_nibbles << "\n" - << " reverse_bits-> " << self.reverse_bits << "\n" - << " opposite_endian-> " << self.opposite_endian << "\n" - << "}\n"; - return ss.str(); - }) - .def_readwrite("encoding", &sox_encodinginfo_t::encoding) - .def_readwrite("bits_per_sample", &sox_encodinginfo_t::bits_per_sample) - .def_readwrite("compression", &sox_encodinginfo_t::compression) - .def_readwrite("reverse_bytes", &sox_encodinginfo_t::reverse_bytes) - .def_readwrite("reverse_nibbles", &sox_encodinginfo_t::reverse_nibbles) - .def_readwrite("reverse_bits", &sox_encodinginfo_t::reverse_bits) - .def_readwrite("opposite_endian", &sox_encodinginfo_t::opposite_endian); - py::enum_(m, "sox_encoding_t") - .value("SOX_ENCODING_UNKNOWN", sox_encoding_t::SOX_ENCODING_UNKNOWN) - .value("SOX_ENCODING_SIGN2", sox_encoding_t::SOX_ENCODING_SIGN2) - .value("SOX_ENCODING_UNSIGNED", sox_encoding_t::SOX_ENCODING_UNSIGNED) - .value("SOX_ENCODING_FLOAT", sox_encoding_t::SOX_ENCODING_FLOAT) - .value("SOX_ENCODING_FLOAT_TEXT", sox_encoding_t::SOX_ENCODING_FLOAT_TEXT) - .value("SOX_ENCODING_FLAC", sox_encoding_t::SOX_ENCODING_FLAC) - .value("SOX_ENCODING_HCOM", sox_encoding_t::SOX_ENCODING_HCOM) - .value("SOX_ENCODING_WAVPACK", sox_encoding_t::SOX_ENCODING_WAVPACK) - .value("SOX_ENCODING_WAVPACKF", sox_encoding_t::SOX_ENCODING_WAVPACKF) - .value("SOX_ENCODING_ULAW", sox_encoding_t::SOX_ENCODING_ULAW) - .value("SOX_ENCODING_ALAW", sox_encoding_t::SOX_ENCODING_ALAW) - .value("SOX_ENCODING_G721", sox_encoding_t::SOX_ENCODING_G721) - .value("SOX_ENCODING_G723", sox_encoding_t::SOX_ENCODING_G723) - .value("SOX_ENCODING_CL_ADPCM", sox_encoding_t::SOX_ENCODING_CL_ADPCM) - .value("SOX_ENCODING_CL_ADPCM16", sox_encoding_t::SOX_ENCODING_CL_ADPCM16) - .value("SOX_ENCODING_MS_ADPCM", sox_encoding_t::SOX_ENCODING_MS_ADPCM) - .value("SOX_ENCODING_IMA_ADPCM", sox_encoding_t::SOX_ENCODING_IMA_ADPCM) - .value("SOX_ENCODING_OKI_ADPCM", sox_encoding_t::SOX_ENCODING_OKI_ADPCM) - .value("SOX_ENCODING_DPCM", sox_encoding_t::SOX_ENCODING_DPCM) - .value("SOX_ENCODING_DWVW", sox_encoding_t::SOX_ENCODING_DWVW) - .value("SOX_ENCODING_DWVWN", sox_encoding_t::SOX_ENCODING_DWVWN) - .value("SOX_ENCODING_GSM", sox_encoding_t::SOX_ENCODING_GSM) - .value("SOX_ENCODING_MP3", sox_encoding_t::SOX_ENCODING_MP3) - .value("SOX_ENCODING_VORBIS", sox_encoding_t::SOX_ENCODING_VORBIS) - .value("SOX_ENCODING_AMR_WB", sox_encoding_t::SOX_ENCODING_AMR_WB) - .value("SOX_ENCODING_AMR_NB", sox_encoding_t::SOX_ENCODING_AMR_NB) - .value("SOX_ENCODING_LPC10", sox_encoding_t::SOX_ENCODING_LPC10) - //.value("SOX_ENCODING_OPUS", sox_encoding_t::SOX_ENCODING_OPUS) // - // creates a compile error - .value("SOX_ENCODINGS", sox_encoding_t::SOX_ENCODINGS) - .export_values(); - py::enum_(m, "sox_option_t") - .value("sox_option_no", sox_option_t::sox_option_no) - .value("sox_option_yes", sox_option_t::sox_option_yes) - .value("sox_option_default", sox_option_t::sox_option_default) - .export_values(); - py::enum_(m, "sox_bool") - .value("sox_false", sox_bool::sox_false) - .value("sox_true", sox_bool::sox_true) - .export_values(); - m.def( - "read_audio_file", - &torch::audio::read_audio_file, - "Reads an audio file into a tensor"); - m.def( - "write_audio_file", - &torch::audio::write_audio_file, - "Writes data from a tensor into an audio file"); - m.def( - "get_info", - &torch::audio::get_info, - "Gets information about an audio file"); m.def( "get_info_fileobj", &torchaudio::sox_io::get_info_fileobj, diff --git a/torchaudio/csrc/sox/legacy.cpp b/torchaudio/csrc/sox/legacy.cpp deleted file mode 100644 index 858ad15149..0000000000 --- a/torchaudio/csrc/sox/legacy.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#include - -namespace torch { -namespace audio { -namespace { -/// Helper struct to safely close the sox_format_t descriptor. -struct SoxDescriptor { - explicit SoxDescriptor(sox_format_t* fd) noexcept : fd_(fd) {} - SoxDescriptor(const SoxDescriptor& other) = delete; - SoxDescriptor(SoxDescriptor&& other) = delete; - SoxDescriptor& operator=(const SoxDescriptor& other) = delete; - SoxDescriptor& operator=(SoxDescriptor&& other) = delete; - ~SoxDescriptor() { - if (fd_ != nullptr) { - sox_close(fd_); - } - } - sox_format_t* operator->() noexcept { - return fd_; - } - sox_format_t* get() noexcept { - return fd_; - } - - private: - sox_format_t* fd_; -}; - -int64_t write_audio(SoxDescriptor& fd, at::Tensor tensor) { - std::vector buffer(tensor.numel()); - - AT_DISPATCH_ALL_TYPES(tensor.scalar_type(), "write_audio_buffer", [&] { - auto* data = tensor.data_ptr(); - std::copy(data, data + tensor.numel(), buffer.begin()); - }); - - const auto samples_written = - sox_write(fd.get(), buffer.data(), buffer.size()); - - return samples_written; -} - -void read_audio(SoxDescriptor& fd, at::Tensor output, int64_t buffer_length) { - std::vector buffer(buffer_length); - - int number_of_channels = fd->signal.channels; - const int64_t samples_read = sox_read(fd.get(), buffer.data(), buffer_length); - if (samples_read == 0) { - throw std::runtime_error( - "Error reading audio file: empty file or read failed in sox_read"); - } - - output.resize_({samples_read / number_of_channels, number_of_channels}); - output = output.contiguous(); - - AT_DISPATCH_ALL_TYPES(output.scalar_type(), "read_audio_buffer", [&] { - auto* data = output.data_ptr(); - std::copy(buffer.begin(), buffer.begin() + samples_read, data); - }); -} -} // namespace - -std::tuple get_info( - const std::string& file_name) { - SoxDescriptor fd(sox_open_read( - file_name.c_str(), - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/nullptr)); - if (fd.get() == nullptr) { - throw std::runtime_error("Error opening audio file"); - } - return std::make_tuple(fd->signal, fd->encoding); -} - -int read_audio_file( - const std::string& file_name, - at::Tensor output, - bool ch_first, - int64_t nframes, - int64_t offset, - sox_signalinfo_t* si, - sox_encodinginfo_t* ei, - const char* ft) { - SoxDescriptor fd(sox_open_read(file_name.c_str(), si, ei, ft)); - if (fd.get() == nullptr) { - throw std::runtime_error("Error opening audio file"); - } - - // signal info - - const int number_of_channels = fd->signal.channels; - const int sample_rate = fd->signal.rate; - const int64_t total_length = fd->signal.length; - - // multiply offset and number of frames by number of channels - offset *= number_of_channels; - nframes *= number_of_channels; - - if (total_length == 0) { - throw std::runtime_error("Error reading audio file: unknown length"); - } - if (offset > total_length) { - throw std::runtime_error("Offset past EOF"); - } - - // calculate buffer length - int64_t buffer_length = total_length; - if (offset > 0) { - buffer_length -= offset; - } - if (nframes > 0 && buffer_length > nframes) { - buffer_length = nframes; - } - - // seek to offset point before reading data - if (sox_seek(fd.get(), offset, 0) == SOX_EOF) { - throw std::runtime_error( - "sox_seek reached EOF, try reducing offset or num_samples"); - } - - // read data and fill output tensor - read_audio(fd, output, buffer_length); - - // L x C -> C x L, if desired - if (ch_first) { - output.transpose_(1, 0); - } - - return sample_rate; -} - -void write_audio_file( - const std::string& file_name, - const at::Tensor& tensor, - sox_signalinfo_t* si, - sox_encodinginfo_t* ei, - const char* file_type) { - if (!tensor.is_contiguous()) { - throw std::runtime_error( - "Error writing audio file: input tensor must be contiguous"); - } - -#if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0 - si->mult = nullptr; -#endif - - SoxDescriptor fd(sox_open_write( - file_name.c_str(), - si, - ei, - file_type, - /*oob=*/nullptr, - /*overwrite=*/nullptr)); - - if (fd.get() == nullptr) { - throw std::runtime_error( - "Error writing audio file: could not open file for writing"); - } - - const auto samples_written = write_audio(fd, tensor); - - if (samples_written != tensor.numel()) { - throw std::runtime_error( - "Error writing audio file: could not write entire buffer"); - } -} - -} // namespace audio -} // namespace torch diff --git a/torchaudio/csrc/sox/legacy.h b/torchaudio/csrc/sox/legacy.h deleted file mode 100644 index 5869695bfe..0000000000 --- a/torchaudio/csrc/sox/legacy.h +++ /dev/null @@ -1,40 +0,0 @@ -#include -#include - -namespace torch { -namespace audio { - -/// Reads an audio file from the given `path` into the `output` `Tensor` and -/// returns the sample rate of the audio file. -/// Throws `std::runtime_error` if the audio file could not be opened, or an -/// error occurred during reading of the audio data. -int read_audio_file( - const std::string& file_name, - at::Tensor output, - bool ch_first, - int64_t nframes, - int64_t offset, - sox_signalinfo_t* si, - sox_encodinginfo_t* ei, - const char* ft); - -/// Writes the data of a `Tensor` into an audio file at the given `path`, with -/// a certain extension (e.g. `wav`or `mp3`) and sample rate. -/// Throws `std::runtime_error` when the audio file could not be opened for -/// writing, or an error occurred during writing of the audio data. -void write_audio_file( - const std::string& file_name, - const at::Tensor& tensor, - sox_signalinfo_t* si, - sox_encodinginfo_t* ei, - const char* file_type); - -/// Reads an audio file from the given `path` and returns a tuple of -/// sox_signalinfo_t and sox_encodinginfo_t, which contain information about -/// the audio file such as sample rate, length, bit precision, encoding and -/// more. Throws `std::runtime_error` if the audio file could not be opened, or -/// an error occurred during reading of the audio data. -std::tuple get_info( - const std::string& file_name); -} // namespace audio -} // namespace torch diff --git a/torchaudio/datasets/tedlium.py b/torchaudio/datasets/tedlium.py index e8d1d1cefc..fb96ca5789 100644 --- a/torchaudio/datasets/tedlium.py +++ b/torchaudio/datasets/tedlium.py @@ -153,11 +153,7 @@ def _load_audio(self, path: str, start_time: float, end_time: float, sample_rate start_time = int(float(start_time) * sample_rate) end_time = int(float(end_time) * sample_rate) - backend = torchaudio.get_audio_backend() - if backend == "sox" or (backend == "soundfile" and torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE): - kwargs = {"offset": start_time, "num_frames": end_time - start_time} - else: - kwargs = {"frame_offset": start_time, "num_frames": end_time - start_time} + kwargs = {"frame_offset": start_time, "num_frames": end_time - start_time} return torchaudio.load(path, **kwargs) From e44a1e03f468d93a515c97e065b446c675451417 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Wed, 24 Feb 2021 19:09:17 +0000 Subject: [PATCH 2/4] Ignore flake8 error for import --- torchaudio/backend/sox_io_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 8b65f9020f..6849f87691 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -10,7 +10,7 @@ from .common import AudioMetaData if _mod_utils.is_module_available('torchaudio._torchaudio'): - from torchaudio import _torchaudio + from torchaudio import _torchaudio # noqa @_mod_utils.requires_module('torchaudio._torchaudio') From 10e16e252e12343aab43b09c45755b95450fe624 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Wed, 24 Feb 2021 19:50:18 +0000 Subject: [PATCH 3/4] Addressed review comments. --- docs/source/backend.rst | 8 -------- torchaudio/backend/sox_io_backend.py | 3 --- torchaudio/extension/extension.py | 1 + 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/source/backend.rst b/docs/source/backend.rst index 9ce1f071b1..cfecf2e8e4 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -23,14 +23,6 @@ Availability ``"soundfile"`` backend requires ``SoundFile``. Please refer to `the SoundFile documentation `_ for the installation. - +--------------------+-----------------------+------------------------+ - | **Backend** | **0.8.0** | **0.9.0** | - +====================+=======================+========================+ - | ``"sox_io"`` | Default on Linx/macOS | Default on Linux/macOS | - +--------------------+-----------------------+------------------------+ - | ``"soundfile"`` | Default on Windows | Default on Windows | - +--------------------+-----------------------+------------------------+ - Common Data Structure ~~~~~~~~~~~~~~~~~~~~~ diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 6849f87691..ecfd5ebd42 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -9,9 +9,6 @@ import torchaudio from .common import AudioMetaData -if _mod_utils.is_module_available('torchaudio._torchaudio'): - from torchaudio import _torchaudio # noqa - @_mod_utils.requires_module('torchaudio._torchaudio') def info( diff --git a/torchaudio/extension/extension.py b/torchaudio/extension/extension.py index 5875f41023..5af62f19be 100644 --- a/torchaudio/extension/extension.py +++ b/torchaudio/extension/extension.py @@ -9,6 +9,7 @@ def _init_extension(): ext = 'torchaudio._torchaudio' if _mod_utils.is_module_available(ext): _init_script_module(ext) + import torchaudio._torchaudio # noqa else: warnings.warn('torchaudio C++ extension is not available.') From d24cfacac939d2ba160a575b686dfb6cee14b0c1 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Wed, 24 Feb 2021 20:29:12 +0000 Subject: [PATCH 4/4] Fixed failure in build doc. --- docs/source/backend.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/backend.rst b/docs/source/backend.rst index cfecf2e8e4..f185d171b0 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -66,6 +66,8 @@ save .. autofunction:: torchaudio.backend.sox_io_backend.save +.. _soundfile_backend: + Soundfile Backend ~~~~~~~~~~~~~~~~~