From 489b769b74aa6301f2e9e8855747a11f593e483d Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Wed, 24 Feb 2021 18:29:53 +0000
Subject: [PATCH 1/4] Removed legacy backends from torchaudio

---
 docs/source/backend.rst                       | 109 +---
 .../backend/legacy_test.py                    | 290 ----------
 .../backend/soundfile/info_test.py            |   2 +-
 .../backend/soundfile/load_test.py            |   2 +-
 .../backend/soundfile/save_test.py            |   2 +-
 .../torchaudio_unittest/backend/utils_test.py |  20 +-
 .../common_utils/backend_utils.py             |   4 +-
 .../datasets/tedlium_test.py                  |   7 -
 torchaudio/__init__.py                        |  10 -
 torchaudio/backend/__init__.py                |  12 -
 torchaudio/backend/_soundfile_backend.py      | 449 ---------------
 torchaudio/backend/common.py                  | 186 -------
 torchaudio/backend/no_backend.py              |  11 +-
 torchaudio/backend/soundfile_backend.py       | 518 ++++++++++++++----
 torchaudio/backend/sox_backend.py             | 294 ----------
 torchaudio/backend/sox_io_backend.py          |   3 +
 torchaudio/backend/utils.py                   |  30 +-
 torchaudio/csrc/CMakeLists.txt                |   1 -
 torchaudio/csrc/pybind.cpp                    |  98 ----
 torchaudio/csrc/sox/legacy.cpp                | 170 ------
 torchaudio/csrc/sox/legacy.h                  |  40 --
 torchaudio/datasets/tedlium.py                |   6 +-
 22 files changed, 436 insertions(+), 1828 deletions(-)
 delete mode 100644 test/torchaudio_unittest/backend/legacy_test.py
 delete mode 100644 torchaudio/backend/_soundfile_backend.py
 delete mode 100644 torchaudio/backend/sox_backend.py
 delete mode 100644 torchaudio/csrc/sox/legacy.cpp
 delete mode 100644 torchaudio/csrc/sox/legacy.h

diff --git a/docs/source/backend.rst b/docs/source/backend.rst
index dcf12c5b2f..9ce1f071b1 100644
--- a/docs/source/backend.rst
+++ b/docs/source/backend.rst
@@ -11,11 +11,7 @@ Overview
 There are currently four implementations available.
 
 * :ref:`"sox_io" <sox_io_backend>` (default on Linux/macOS)
-* :ref:`"sox" <sox_backend>` (deprecated, will be removed in 0.9.0 release)
 * :ref:`"soundfile" <soundfile_backend>` (default on Windows)
-* :ref:`"soundfile" (legacy interface) <soundfile_legacy_backend>` (deprecated, will be removed in 0.9.0 release)
-
-The use of ``"sox"`` backend is strongly discouraged as it cannot correctly handle formats other than 16-bit integer WAV. See `#726 <https://github.com/pytorch/audio/pull/726>`_ for the detail.
 
 .. note::
    Instead of calling functions in ``torchaudio.backend`` directly, please use ``torchaudio.info``, ``torchaudio.load``, ``torchaudio.load_wav`` and ``torchaudio.save`` with proper backend set with :func:`torchaudio.set_audio_backend`.
@@ -23,31 +19,17 @@ The use of ``"sox"`` backend is strongly discouraged as it cannot correctly hand
 Availability
 ------------
 
-``"sox"`` and ``"sox_io"`` backends require C++ extension module, which is included in Linux/macOS binary distributions. These backends are not available on Windows.
+``"sox_io"`` backend requires C++ extension module, which is included in Linux/macOS binary distributions. This backend is not available on Windows.
 
 ``"soundfile"`` backend requires ``SoundFile``. Please refer to `the SoundFile documentation <https://pysoundfile.readthedocs.io/en/latest/>`_ for the installation.
 
-Changes in default backend and deprecation
-------------------------------------------
-
-Backend module is going through a major overhaul. The following table summarizes the timeline for the deprecations and removals.
-
  +--------------------+-----------------------+------------------------+
  | **Backend**        | **0.8.0**             | **0.9.0**              |
  +====================+=======================+========================+
  | ``"sox_io"``       | Default on Linx/macOS | Default on Linux/macOS |
  +--------------------+-----------------------+------------------------+
- | ``"sox"``          | Available             | Removed                |
- | (deprecated)       |                       |                        |
- +--------------------+-----------------------+------------------------+
  | ``"soundfile"``    | Default on Windows    | Default on Windows     |
  +--------------------+-----------------------+------------------------+
- | ``"soundfile"``    | Available             | Removed                |
- | (legacy interface, |                       |                        |
- | deprecated)        |                       |                        |
- +--------------------+-----------------------+------------------------+
-
-* The ``"sox"`` and ``"soundfile" (legacy interface)`` backends are deprecated and will be removed in 0.9.0 release.
 
 Common Data Structure
 ~~~~~~~~~~~~~~~~~~~~~
@@ -59,16 +41,6 @@ AudioMetaData
 
 .. autoclass:: torchaudio.backend.common.AudioMetaData
 
-SignalInfo (Deprecated)
------------------------
-
-.. autoclass:: torchaudio.backend.common.SignalInfo
-
-EncodingInfo (Deprecated)
--------------------------
-
-.. autoclass:: torchaudio.backend.common.EncodingInfo
-
 .. _sox_io_backend:
 
 Sox IO Backend
@@ -102,46 +74,6 @@ save
 
 .. autofunction:: torchaudio.backend.sox_io_backend.save
 
-.. _sox_backend:
-
-Sox Backend (Deprecated)
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-The ``"sox"`` backend is available on Linux/macOS and not available on Windows. This backend is deprecated and will be removed in ``0.9.0`` release.
-
-You can switch from another backend to ``sox`` backend with the following;
-
-.. code::
-
-   torchaudio.set_audio_backend("sox")
-
-info
-----
-
-.. autofunction:: torchaudio.backend.sox_backend.info
-
-load
-----
-
-.. autofunction:: torchaudio.backend.sox_backend.load
-
-.. autofunction:: torchaudio.backend.sox_backend.load_wav
-
-
-save
-----
-
-.. autofunction:: torchaudio.backend.sox_backend.save
-
-others
-------
-
-.. automodule:: torchaudio.backend.sox_backend
-   :members:
-   :exclude-members: info, load, load_wav, save
-
-.. _soundfile_backend:
-
 Soundfile Backend
 ~~~~~~~~~~~~~~~~~
 
@@ -153,48 +85,13 @@ You can switch from another backend to the ``"soundfile"`` backend with the foll
 
    torchaudio.set_audio_backend("soundfile")
 
-.. note::
-    If you are switching from `"soundfile" (legacy interface) <soundfile_legacy_backend>` backend, set ``torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE`` flag **before** switching the backend.
-
 info
 ----
 
-.. autofunction:: torchaudio.backend._soundfile_backend.info
-
-load
-----
-
-.. autofunction:: torchaudio.backend._soundfile_backend.load
-
-.. autofunction:: torchaudio.backend._soundfile_backend.load_wav
-
-
-save
-----
-
-.. autofunction:: torchaudio.backend._soundfile_backend.save
-
-.. _soundfile_legacy_backend:
-
-Legacy Interface (Deprecated)
------------------------------
-
-``"soundfile"`` backend with legacy interface is made available for backward compatibility reason, however this interface is deprecated and will be removed in the ``0.9.0`` release.
-
-To switch to this backend/interface, set ``torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE`` flag **before** switching the backend.
-
-.. code::
-
-   torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True
-   torchaudio.set_audio_backend("soundfile")  # The legacy interface
-
-info
-^^^^
-
 .. autofunction:: torchaudio.backend.soundfile_backend.info
 
 load
-^^^^
+----
 
 .. autofunction:: torchaudio.backend.soundfile_backend.load
 
@@ -202,6 +99,6 @@ load
 
 
 save
-^^^^
+----
 
 .. autofunction:: torchaudio.backend.soundfile_backend.save
diff --git a/test/torchaudio_unittest/backend/legacy_test.py b/test/torchaudio_unittest/backend/legacy_test.py
deleted file mode 100644
index 3dbc419bde..0000000000
--- a/test/torchaudio_unittest/backend/legacy_test.py
+++ /dev/null
@@ -1,290 +0,0 @@
-import os
-import math
-import shutil
-import tempfile
-import unittest
-
-import torch
-import torchaudio
-from torchaudio.utils import sox_utils
-from torchaudio._internal.module_utils import is_module_available
-
-from torchaudio_unittest.common_utils import get_asset_path
-
-BACKENDS = []
-BACKENDS_MP3 = []
-
-if is_module_available('soundfile'):
-    BACKENDS.append('soundfile')
-
-if is_module_available('torchaudio._torchaudio'):
-    BACKENDS.append('sox')
-
-    if (
-            'mp3' in sox_utils.list_read_formats() and
-            'mp3' in sox_utils.list_write_formats()
-    ):
-        BACKENDS_MP3 = ['sox']
-
-
-def create_temp_assets_dir():
-    """
-    Creates a temporary directory and moves all files from test/assets there.
-    Returns a Tuple[string, TemporaryDirectory] which is the folder path
-    and object.
-    """
-    tmp_dir = tempfile.TemporaryDirectory()
-    shutil.copytree(get_asset_path(), os.path.join(tmp_dir.name, "assets"))
-    return tmp_dir.name, tmp_dir
-
-
-class Test_LoadSave(unittest.TestCase):
-    test_dirpath, test_dir = create_temp_assets_dir()
-    test_filepath = os.path.join(test_dirpath, "assets",
-                                 "steam-train-whistle-daniel_simon.mp3")
-    test_filepath_wav = os.path.join(test_dirpath, "assets",
-                                     "steam-train-whistle-daniel_simon.wav")
-
-    def setUp(self):
-        torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True
-
-    def test_1_save(self):
-        for backend in BACKENDS_MP3:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_1_save(self.test_filepath, False)
-
-        for backend in BACKENDS:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_1_save(self.test_filepath_wav, True)
-
-    def _test_1_save(self, test_filepath, normalization):
-        # load signal
-        x, sr = torchaudio.load(test_filepath, normalization=normalization)
-
-        # check save
-        new_filepath = os.path.join(self.test_dirpath, "test.wav")
-        torchaudio.save(new_filepath, x, sr)
-        self.assertTrue(os.path.isfile(new_filepath))
-        os.unlink(new_filepath)
-
-        # check automatic normalization
-        x /= 1 << 31
-        torchaudio.save(new_filepath, x, sr)
-        self.assertTrue(os.path.isfile(new_filepath))
-        os.unlink(new_filepath)
-
-        # test save 1d tensor
-        x = x[0, :]  # get mono signal
-        x.squeeze_()  # remove channel dim
-        torchaudio.save(new_filepath, x, sr)
-        self.assertTrue(os.path.isfile(new_filepath))
-        os.unlink(new_filepath)
-
-        # don't allow invalid sizes as inputs
-        with self.assertRaises(ValueError):
-            x.unsqueeze_(1)  # L x C not C x L
-            torchaudio.save(new_filepath, x, sr)
-
-        with self.assertRaises(ValueError):
-            x.squeeze_()
-            x.unsqueeze_(1)
-            x.unsqueeze_(0)  # 1 x L x 1
-            torchaudio.save(new_filepath, x, sr)
-
-        # don't save to folders that don't exist
-        with self.assertRaises(OSError):
-            new_filepath = os.path.join(self.test_dirpath, "no-path",
-                                        "test.wav")
-            torchaudio.save(new_filepath, x, sr)
-
-    def test_1_save_sine(self):
-        for backend in BACKENDS:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_1_save_sine()
-
-    def _test_1_save_sine(self):
-
-        # save created file
-        sinewave_filepath = os.path.join(self.test_dirpath, "assets",
-                                         "sinewave.wav")
-        sr = 16000
-        freq = 440
-        volume = 0.3
-
-        y = (torch.cos(
-            2 * math.pi * torch.arange(0, 4 * sr).float() * freq / sr))
-        y.unsqueeze_(0)
-        # y is between -1 and 1, so must scale
-        y = (y * volume * (2**31)).long()
-        torchaudio.save(sinewave_filepath, y, sr)
-        self.assertTrue(os.path.isfile(sinewave_filepath))
-
-        # test precision
-        new_precision = 32
-        new_filepath = os.path.join(self.test_dirpath, "test.wav")
-        si, ei = torchaudio.info(sinewave_filepath)
-        torchaudio.save(new_filepath, y, sr, new_precision)
-        si32, ei32 = torchaudio.info(new_filepath)
-        self.assertEqual(si.precision, 16)
-        self.assertEqual(si32.precision, new_precision)
-        os.unlink(new_filepath)
-
-    def test_2_load(self):
-        for backend in BACKENDS_MP3:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_2_load(self.test_filepath, 278756)
-
-        for backend in BACKENDS:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_2_load(self.test_filepath_wav, 276858)
-
-    def _test_2_load(self, test_filepath, length):
-        # check normal loading
-        x, sr = torchaudio.load(test_filepath)
-        self.assertEqual(sr, 44100)
-        self.assertEqual(x.size(), (2, length))
-
-        # check offset
-        offset = 15
-        x, _ = torchaudio.load(test_filepath)
-        x_offset, _ = torchaudio.load(test_filepath, offset=offset)
-        self.assertTrue(x[:, offset:].allclose(x_offset))
-
-        # check number of frames
-        n = 201
-        x, _ = torchaudio.load(test_filepath, num_frames=n)
-        self.assertTrue(x.size(), (2, n))
-
-        # check channels first
-        x, _ = torchaudio.load(test_filepath, channels_first=False)
-        self.assertEqual(x.size(), (length, 2))
-
-        # check raising errors
-        with self.assertRaises(OSError):
-            torchaudio.load("file-does-not-exist.mp3")
-
-        with self.assertRaises(OSError):
-            tdir = os.path.join(
-                os.path.dirname(self.test_dirpath), "torchaudio")
-            torchaudio.load(tdir)
-
-    def test_2_load_nonormalization(self):
-        for backend in BACKENDS_MP3:
-            if backend == 'sox_io':
-                continue
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_2_load_nonormalization(self.test_filepath, 278756)
-
-    def _test_2_load_nonormalization(self, test_filepath, length):
-
-        # check no normalizing
-        x, _ = torchaudio.load(test_filepath, normalization=False)
-        self.assertTrue(x.min() <= -1.0)
-        self.assertTrue(x.max() >= 1.0)
-
-        # check different input tensor type
-        x, _ = torchaudio.load(test_filepath, torch.LongTensor(), normalization=False)
-        self.assertTrue(isinstance(x, torch.LongTensor))
-
-    def test_3_load_and_save_is_identity(self):
-        for backend in BACKENDS:
-            if backend == 'sox_io':
-                continue
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_3_load_and_save_is_identity()
-
-    def _test_3_load_and_save_is_identity(self):
-        input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
-        tensor, sample_rate = torchaudio.load(input_path)
-        output_path = os.path.join(self.test_dirpath, 'test.wav')
-        torchaudio.save(output_path, tensor, sample_rate)
-        tensor2, sample_rate2 = torchaudio.load(output_path)
-        self.assertTrue(tensor.allclose(tensor2))
-        self.assertEqual(sample_rate, sample_rate2)
-        os.unlink(output_path)
-
-    @unittest.skipIf(any(be not in BACKENDS for be in ["sox", "soundfile"]), "sox and soundfile are not available")
-    def test_3_load_and_save_is_identity_across_backend(self):
-        with self.subTest():
-            self._test_3_load_and_save_is_identity_across_backend("sox", "soundfile")
-        with self.subTest():
-            self._test_3_load_and_save_is_identity_across_backend("soundfile", "sox")
-
-    def _test_3_load_and_save_is_identity_across_backend(self, backend1, backend2):
-        torchaudio.set_audio_backend(backend1)
-        input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
-        tensor1, sample_rate1 = torchaudio.load(input_path)
-
-        output_path = os.path.join(self.test_dirpath, 'test.wav')
-        torchaudio.save(output_path, tensor1, sample_rate1)
-
-        torchaudio.set_audio_backend(backend2)
-        tensor2, sample_rate2 = torchaudio.load(output_path)
-
-        self.assertTrue(tensor1.allclose(tensor2))
-        self.assertEqual(sample_rate1, sample_rate2)
-        os.unlink(output_path)
-
-    def test_4_load_partial(self):
-        for backend in BACKENDS_MP3:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_4_load_partial()
-
-    def _test_4_load_partial(self):
-        num_frames = 101
-        offset = 201
-        # load entire mono sinewave wav file, load a partial copy and then compare
-        input_sine_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
-        x_sine_full, sr_sine = torchaudio.load(input_sine_path)
-        x_sine_part, _ = torchaudio.load(input_sine_path, num_frames=num_frames, offset=offset)
-        l1_error = x_sine_full[:, offset:(num_frames + offset)].sub(x_sine_part).abs().sum().item()
-        # test for the correct number of samples and that the correct portion was loaded
-        self.assertEqual(x_sine_part.size(1), num_frames)
-        self.assertEqual(l1_error, 0.)
-        # create a two channel version of this wavefile
-        x_2ch_sine = x_sine_full.repeat(1, 2)
-        out_2ch_sine_path = os.path.join(self.test_dirpath, 'assets', '2ch_sinewave.wav')
-        torchaudio.save(out_2ch_sine_path, x_2ch_sine, sr_sine)
-        x_2ch_sine_load, _ = torchaudio.load(out_2ch_sine_path, num_frames=num_frames, offset=offset)
-        os.unlink(out_2ch_sine_path)
-        l1_error = x_2ch_sine_load.sub(x_2ch_sine[:, offset:(offset + num_frames)]).abs().sum().item()
-        self.assertEqual(l1_error, 0.)
-
-        # test with two channel mp3
-        x_2ch_full, sr_2ch = torchaudio.load(self.test_filepath, normalization=True)
-        x_2ch_part, _ = torchaudio.load(self.test_filepath, normalization=True, num_frames=num_frames, offset=offset)
-        l1_error = x_2ch_full[:, offset:(offset + num_frames)].sub(x_2ch_part).abs().sum().item()
-        self.assertEqual(x_2ch_part.size(1), num_frames)
-        self.assertEqual(l1_error, 0.)
-
-        # check behavior if number of samples would exceed file length
-        offset_ns = 300
-        x_ns, _ = torchaudio.load(input_sine_path, num_frames=100000, offset=offset_ns)
-        self.assertEqual(x_ns.size(1), x_sine_full.size(1) - offset_ns)
-
-        # check when offset is beyond the end of the file
-        with self.assertRaises(RuntimeError):
-            torchaudio.load(input_sine_path, offset=100000)
-
-    def test_5_get_info(self):
-        for backend in BACKENDS:
-            with self.subTest():
-                torchaudio.set_audio_backend(backend)
-                self._test_5_get_info()
-
-    def _test_5_get_info(self):
-        input_path = os.path.join(self.test_dirpath, 'assets', 'sinewave.wav')
-        channels, samples, rate, precision = (1, 64000, 16000, 16)
-        si, ei = torchaudio.info(input_path)
-        self.assertEqual(si.channels, channels)
-        self.assertEqual(si.length, samples)
-        self.assertEqual(si.rate, rate)
-        self.assertEqual(ei.bits_per_sample, precision)
diff --git a/test/torchaudio_unittest/backend/soundfile/info_test.py b/test/torchaudio_unittest/backend/soundfile/info_test.py
index 3b3f792281..23f7e2cb4e 100644
--- a/test/torchaudio_unittest/backend/soundfile/info_test.py
+++ b/test/torchaudio_unittest/backend/soundfile/info_test.py
@@ -3,7 +3,7 @@
 import tarfile
 
 import torch
-from torchaudio.backend import _soundfile_backend as soundfile_backend
+from torchaudio.backend import soundfile_backend
 from torchaudio._internal import module_utils as _mod_utils
 
 from torchaudio_unittest.common_utils import (
diff --git a/test/torchaudio_unittest/backend/soundfile/load_test.py b/test/torchaudio_unittest/backend/soundfile/load_test.py
index 399266de8f..0e3a240d26 100644
--- a/test/torchaudio_unittest/backend/soundfile/load_test.py
+++ b/test/torchaudio_unittest/backend/soundfile/load_test.py
@@ -4,7 +4,7 @@
 
 import torch
 from torchaudio._internal import module_utils as _mod_utils
-from torchaudio.backend import _soundfile_backend as soundfile_backend
+from torchaudio.backend import soundfile_backend
 from parameterized import parameterized
 
 from torchaudio_unittest.common_utils import (
diff --git a/test/torchaudio_unittest/backend/soundfile/save_test.py b/test/torchaudio_unittest/backend/soundfile/save_test.py
index 2c511ae3a1..06b45a63e6 100644
--- a/test/torchaudio_unittest/backend/soundfile/save_test.py
+++ b/test/torchaudio_unittest/backend/soundfile/save_test.py
@@ -2,7 +2,7 @@
 from unittest.mock import patch
 
 from torchaudio._internal import module_utils as _mod_utils
-from torchaudio.backend import _soundfile_backend as soundfile_backend
+from torchaudio.backend import soundfile_backend
 
 from torchaudio_unittest.common_utils import (
     TempDirMixin,
diff --git a/test/torchaudio_unittest/backend/utils_test.py b/test/torchaudio_unittest/backend/utils_test.py
index 3f355be0cb..e7f908762d 100644
--- a/test/torchaudio_unittest/backend/utils_test.py
+++ b/test/torchaudio_unittest/backend/utils_test.py
@@ -25,31 +25,13 @@ class TestBackendSwitch_NoBackend(BackendSwitchMixin, common_utils.TorchaudioTes
     backend_module = torchaudio.backend.no_backend
 
 
-@common_utils.skipIfNoExtension
-class TestBackendSwitch_SoX(BackendSwitchMixin, common_utils.TorchaudioTestCase):
-    backend = 'sox'
-    backend_module = torchaudio.backend.sox_backend
-
-
 @common_utils.skipIfNoExtension
 class TestBackendSwitch_SoXIO(BackendSwitchMixin, common_utils.TorchaudioTestCase):
     backend = 'sox_io'
     backend_module = torchaudio.backend.sox_io_backend
 
 
-@common_utils.skipIfNoModule('soundfile')
-class TestBackendSwitch_soundfile_legacy(BackendSwitchMixin, common_utils.TorchaudioTestCase):
-    backend = 'soundfile'
-    backend_module = torchaudio.backend.soundfile_backend
-
-    def setUp(self):
-        torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True
-
-    def tearDown(self):
-        torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = None
-
-
 @common_utils.skipIfNoModule('soundfile')
 class TestBackendSwitch_soundfile(BackendSwitchMixin, common_utils.TorchaudioTestCase):
     backend = 'soundfile'
-    backend_module = torchaudio.backend._soundfile_backend
+    backend_module = torchaudio.backend.soundfile_backend
diff --git a/test/torchaudio_unittest/common_utils/backend_utils.py b/test/torchaudio_unittest/common_utils/backend_utils.py
index 7e519c80eb..84dd73ed2e 100644
--- a/test/torchaudio_unittest/common_utils/backend_utils.py
+++ b/test/torchaudio_unittest/common_utils/backend_utils.py
@@ -6,15 +6,13 @@
 def set_audio_backend(backend):
     """Allow additional backend value, 'default'"""
     backends = torchaudio.list_audio_backends()
-    if backend == 'soundfile-new':
+    if backend == 'soundfile':
         be = 'soundfile'
-        torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False
     elif backend == 'default':
         if 'sox_io' in backends:
             be = 'sox_io'
         elif 'soundfile' in backends:
             be = 'soundfile'
-            torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = True
         else:
             raise unittest.SkipTest('No default backend available')
     else:
diff --git a/test/torchaudio_unittest/datasets/tedlium_test.py b/test/torchaudio_unittest/datasets/tedlium_test.py
index abb6ea5b5c..20e5c2e838 100644
--- a/test/torchaudio_unittest/datasets/tedlium_test.py
+++ b/test/torchaudio_unittest/datasets/tedlium_test.py
@@ -143,13 +143,6 @@ class TestTedliumSoundfile(Tedlium, TorchaudioTestCase):
     backend = "soundfile"
 
 
-class TestTedliumSoundfileNew(Tedlium, TorchaudioTestCase):
-    backend = "soundfile-new"
-
-
 if platform.system() != "Windows":
-    class TestTedliumSox(Tedlium, TorchaudioTestCase):
-        backend = "sox"
-
     class TestTedliumSoxIO(Tedlium, TorchaudioTestCase):
         backend = "sox_io"
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
index 35f5fd41d5..e72ecac4cb 100644
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
@@ -10,20 +10,10 @@
     transforms,
 )
 
-USE_SOUNDFILE_LEGACY_INTERFACE = None
-
 from torchaudio.backend import (
     list_audio_backends,
     get_audio_backend,
     set_audio_backend,
-    save_encinfo,
-    sox_signalinfo_t,
-    sox_encodinginfo_t,
-    get_sox_option_t,
-    get_sox_encoding_t,
-    get_sox_bool,
-    SignalInfo,
-    EncodingInfo,
 )
 
 try:
diff --git a/torchaudio/backend/__init__.py b/torchaudio/backend/__init__.py
index 361935229f..c3fdf0b439 100644
--- a/torchaudio/backend/__init__.py
+++ b/torchaudio/backend/__init__.py
@@ -5,18 +5,6 @@
     get_audio_backend,
     set_audio_backend,
 )
-from .sox_backend import (
-    save_encinfo,
-    sox_signalinfo_t,
-    sox_encodinginfo_t,
-    get_sox_option_t,
-    get_sox_encoding_t,
-    get_sox_bool,
-)
-from .common import (
-    SignalInfo,
-    EncodingInfo,
-)
 
 
 utils._init_audio_backend()
diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py
deleted file mode 100644
index f939548413..0000000000
--- a/torchaudio/backend/_soundfile_backend.py
+++ /dev/null
@@ -1,449 +0,0 @@
-"""The new soundfile backend which will become default in 0.8.0 onward"""
-from typing import Tuple, Optional
-import warnings
-
-import torch
-from torchaudio._internal import module_utils as _mod_utils
-from .common import AudioMetaData
-
-
-if _mod_utils.is_module_available("soundfile"):
-    import soundfile
-
-
-# Mapping from soundfile subtype to number of bits per sample.
-# This is mostly heuristical and the value is set to 0 when it is irrelevant
-# (lossy formats) or when it can't be inferred.
-# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
-# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
-# the default seems to be 8 bits but it can be compressed further to 4 bits.
-# The dict is inspired from
-# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
-_SUBTYPE_TO_BITS_PER_SAMPLE = {
-    'PCM_S8': 8,  # Signed 8 bit data
-    'PCM_16': 16,  # Signed 16 bit data
-    'PCM_24': 24,  # Signed 24 bit data
-    'PCM_32': 32,  # Signed 32 bit data
-    'PCM_U8': 8,  # Unsigned 8 bit data (WAV and RAW only)
-    'FLOAT': 32,  # 32 bit float data
-    'DOUBLE': 64,  # 64 bit float data
-    'ULAW': 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
-    'ALAW': 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
-    'IMA_ADPCM': 0,  # IMA ADPCM.
-    'MS_ADPCM': 0,  # Microsoft ADPCM.
-    'GSM610': 0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
-    'VOX_ADPCM': 0,  # OKI / Dialogix ADPCM
-    'G721_32': 0,  # 32kbs G721 ADPCM encoding.
-    'G723_24': 0,  # 24kbs G723 ADPCM encoding.
-    'G723_40': 0,  # 40kbs G723 ADPCM encoding.
-    'DWVW_12': 12,  # 12 bit Delta Width Variable Word encoding.
-    'DWVW_16': 16,  # 16 bit Delta Width Variable Word encoding.
-    'DWVW_24': 24,  # 24 bit Delta Width Variable Word encoding.
-    'DWVW_N': 0,  # N bit Delta Width Variable Word encoding.
-    'DPCM_8': 8,  # 8 bit differential PCM (XI only)
-    'DPCM_16': 16,  # 16 bit differential PCM (XI only)
-    'VORBIS': 0,  # Xiph Vorbis encoding. (lossy)
-    'ALAC_16': 16,  # Apple Lossless Audio Codec (16 bit).
-    'ALAC_20': 20,  # Apple Lossless Audio Codec (20 bit).
-    'ALAC_24': 24,  # Apple Lossless Audio Codec (24 bit).
-    'ALAC_32': 32,  # Apple Lossless Audio Codec (32 bit).
-}
-
-
-def _get_bit_depth(subtype):
-    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
-        warnings.warn(
-            f"The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample "
-            "attribute will be set to 0. If you are seeing this warning, please "
-            "report by opening an issue on github (after checking for existing/closed ones). "
-            "You may otherwise ignore this warning."
-        )
-    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
-
-
-_SUBTYPE_TO_ENCODING = {
-    'PCM_S8': 'PCM_S',
-    'PCM_16': 'PCM_S',
-    'PCM_24': 'PCM_S',
-    'PCM_32': 'PCM_S',
-    'PCM_U8': 'PCM_U',
-    'FLOAT': 'PCM_F',
-    'DOUBLE': 'PCM_F',
-    'ULAW': 'ULAW',
-    'ALAW': 'ALAW',
-    'VORBIS': 'VORBIS',
-}
-
-
-def _get_encoding(format: str, subtype: str):
-    if format == 'FLAC':
-        return 'FLAC'
-    return _SUBTYPE_TO_ENCODING.get(subtype, 'UNKNOWN')
-
-
-@_mod_utils.requires_module("soundfile")
-def info(filepath: str, format: Optional[str] = None) -> AudioMetaData:
-    """Get signal information of an audio file.
-
-    Args:
-        filepath (path-like object or file-like object):
-            Source of audio data.
-            Note:
-                  * This argument is intentionally annotated as ``str`` only,
-                    for the consistency with "sox_io" backend, which has a restriction
-                    on type annotation due to TorchScript compiler compatiblity.
-        format (str, optional):
-            Not used. PySoundFile does not accept format hint.
-
-    Returns:
-        AudioMetaData: meta data of the given audio.
-    """
-    sinfo = soundfile.info(filepath)
-    return AudioMetaData(
-        sinfo.samplerate,
-        sinfo.frames,
-        sinfo.channels,
-        bits_per_sample=_get_bit_depth(sinfo.subtype),
-        encoding=_get_encoding(sinfo.format, sinfo.subtype),
-    )
-
-
-_SUBTYPE2DTYPE = {
-    "PCM_S8": "int8",
-    "PCM_U8": "uint8",
-    "PCM_16": "int16",
-    "PCM_32": "int32",
-    "FLOAT": "float32",
-    "DOUBLE": "float64",
-}
-
-
-@_mod_utils.requires_module("soundfile")
-def load(
-    filepath: str,
-    frame_offset: int = 0,
-    num_frames: int = -1,
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[torch.Tensor, int]:
-    """Load audio data from file.
-
-    Note:
-        The formats this function can handle depend on the soundfile installation.
-        This function is tested on the following formats;
-
-        * WAV
-
-            * 32-bit floating-point
-            * 32-bit signed integer
-            * 16-bit signed integer
-            * 8-bit unsigned integer
-
-        * FLAC
-        * OGG/VORBIS
-        * SPHERE
-
-    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
-    ``float32`` dtype and the shape of ``[channel, time]``.
-    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
-
-    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
-    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
-    by providing ``normalize=False``, this function can return integer Tensor, where the samples
-    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
-    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
-
-    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
-    ``flac`` and ``mp3``.
-    For these formats, this function always returns ``float32`` Tensor with values normalized to
-    ``[-1.0, 1.0]``.
-
-    Args:
-        filepath (path-like object or file-like object):
-            Source of audio data.
-            Note:
-                  * This argument is intentionally annotated as ``str`` only,
-                    for the consistency with "sox_io" backend, which has a restriction
-                    on type annotation due to TorchScript compiler compatiblity.
-        frame_offset (int):
-            Number of frames to skip before start reading data.
-        num_frames (int):
-            Maximum number of frames to read. ``-1`` reads all the remaining samples,
-            starting from ``frame_offset``.
-            This function may return the less number of frames if there is not enough
-            frames in the given file.
-        normalize (bool):
-            When ``True``, this function always return ``float32``, and sample values are
-            normalized to ``[-1.0, 1.0]``.
-            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
-            integer type.
-            This argument has no effect for formats other than integer WAV type.
-        channels_first (bool):
-            When True, the returned Tensor has dimension ``[channel, time]``.
-            Otherwise, the returned Tensor's dimension is ``[time, channel]``.
-        format (str, optional):
-            Not used. PySoundFile does not accept format hint.
-
-    Returns:
-        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
-            If the input file has integer wav format and normalization is off, then it has
-            integer type, else ``float32`` type. If ``channels_first=True``, it has
-            ``[channel, time]`` else ``[time, channel]``.
-    """
-    with soundfile.SoundFile(filepath, "r") as file_:
-        if file_.format != "WAV" or normalize:
-            dtype = "float32"
-        elif file_.subtype not in _SUBTYPE2DTYPE:
-            raise ValueError(f"Unsupported subtype: {file_.subtype}")
-        else:
-            dtype = _SUBTYPE2DTYPE[file_.subtype]
-
-        frames = file_._prepare_read(frame_offset, None, num_frames)
-        waveform = file_.read(frames, dtype, always_2d=True)
-        sample_rate = file_.samplerate
-
-    waveform = torch.from_numpy(waveform)
-    if channels_first:
-        waveform = waveform.t()
-    return waveform, sample_rate
-
-
-def _get_subtype_for_wav(
-        dtype: torch.dtype,
-        encoding: str,
-        bits_per_sample: int):
-    if not encoding:
-        if not bits_per_sample:
-            subtype = {
-                torch.uint8: "PCM_U8",
-                torch.int16: "PCM_16",
-                torch.int32: "PCM_32",
-                torch.float32: "FLOAT",
-                torch.float64: "DOUBLE",
-            }.get(dtype)
-            if not subtype:
-                raise ValueError(f"Unsupported dtype for wav: {dtype}")
-            return subtype
-        if bits_per_sample == 8:
-            return "PCM_U8"
-        return f"PCM_{bits_per_sample}"
-    if encoding == "PCM_S":
-        if not bits_per_sample:
-            return "PCM_32"
-        if bits_per_sample == 8:
-            raise ValueError("wav does not support 8-bit signed PCM encoding.")
-        return f"PCM_{bits_per_sample}"
-    if encoding == "PCM_U":
-        if bits_per_sample in (None, 8):
-            return "PCM_U8"
-        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
-    if encoding == "PCM_F":
-        if bits_per_sample in (None, 32):
-            return "FLOAT"
-        if bits_per_sample == 64:
-            return "DOUBLE"
-        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
-    if encoding == "ULAW":
-        if bits_per_sample in (None, 8):
-            return "ULAW"
-        raise ValueError("wav only supports 8-bit mu-law encoding.")
-    if encoding == "ALAW":
-        if bits_per_sample in (None, 8):
-            return "ALAW"
-        raise ValueError("wav only supports 8-bit a-law encoding.")
-    raise ValueError(f"wav does not support {encoding}.")
-
-
-def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
-    if encoding in (None, "PCM_S"):
-        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
-    if encoding in ("PCM_U", "PCM_F"):
-        raise ValueError(f"sph does not support {encoding} encoding.")
-    if encoding == "ULAW":
-        if bits_per_sample in (None, 8):
-            return "ULAW"
-        raise ValueError("sph only supports 8-bit for mu-law encoding.")
-    if encoding == "ALAW":
-        return "ALAW"
-    raise ValueError(f"sph does not support {encoding}.")
-
-
-def _get_subtype(
-        dtype: torch.dtype,
-        format: str,
-        encoding: str,
-        bits_per_sample: int):
-    if format == "wav":
-        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
-    if format == "flac":
-        if encoding:
-            raise ValueError("flac does not support encoding.")
-        if not bits_per_sample:
-            return "PCM_24"
-        if bits_per_sample > 24:
-            raise ValueError("flac does not support bits_per_sample > 24.")
-        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
-    if format in ("ogg", "vorbis"):
-        if encoding or bits_per_sample:
-            raise ValueError(
-                "ogg/vorbis does not support encoding/bits_per_sample.")
-        return "VORBIS"
-    if format == "sph":
-        return _get_subtype_for_sphere(encoding, bits_per_sample)
-    if format in ("nis", "nist"):
-        return "PCM_16"
-    raise ValueError(f"Unsupported format: {format}")
-
-
-@_mod_utils.requires_module("soundfile")
-def save(
-    filepath: str,
-    src: torch.Tensor,
-    sample_rate: int,
-    channels_first: bool = True,
-    compression: Optional[float] = None,
-    format: Optional[str] = None,
-    encoding: Optional[str] = None,
-    bits_per_sample: Optional[int] = None,
-):
-    """Save audio data to file.
-
-    Note:
-        The formats this function can handle depend on the soundfile installation.
-        This function is tested on the following formats;
-
-        * WAV
-
-            * 32-bit floating-point
-            * 32-bit signed integer
-            * 16-bit signed integer
-            * 8-bit unsigned integer
-
-        * FLAC
-        * OGG/VORBIS
-        * SPHERE
-
-    Args:
-        filepath (str or pathlib.Path): Path to audio file.
-            This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str``
-            for the consistency with "sox_io" backend, which has a restriction on type annotation
-            for TorchScript compiler compatiblity.
-        src (torch.Tensor): Audio data to save. must be 2D tensor.
-        sample_rate (int): sampling rate
-        channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``,
-            otherwise ``[time, channel]``.
-        compression (Optional[float]): Not used.
-            It is here only for interface compatibility reson with "sox_io" backend.
-        format (str, optional): Override the audio format.
-            When ``filepath`` argument is path-like object, audio format is
-            inferred from file extension. If the file extension is missing or
-            different, you can specify the correct format with this argument.
-
-            When ``filepath`` argument is file-like object,
-            this argument is required.
-
-            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
-            ``"flac"`` and ``"sph"``.
-        encoding (str, optional): Changes the encoding for supported formats.
-            This argument is effective only for supported formats, sush as
-            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are;
-
-                - ``"PCM_S"`` (signed integer Linear PCM)
-                - ``"PCM_U"`` (unsigned integer Linear PCM)
-                - ``"PCM_F"`` (floating point PCM)
-                - ``"ULAW"`` (mu-law)
-                - ``"ALAW"`` (a-law)
-
-        bits_per_sample (int, optional): Changes the bit depth for the
-            supported formats.
-            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
-            you can change the bit depth.
-            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.
-
-    Supported formats/encodings/bit depth/compression are:
-
-    ``"wav"``
-        - 32-bit floating-point PCM
-        - 32-bit signed integer PCM
-        - 24-bit signed integer PCM
-        - 16-bit signed integer PCM
-        - 8-bit unsigned integer PCM
-        - 8-bit mu-law
-        - 8-bit a-law
-
-        Note: Default encoding/bit depth is determined by the dtype of
-              the input Tensor.
-
-    ``"flac"``
-        - 8-bit
-        - 16-bit
-        - 24-bit (default)
-
-    ``"ogg"``, ``"vorbis"``
-        - Doesn't accept changing configuration.
-
-    ``"sph"``
-        - 8-bit signed integer PCM
-        - 16-bit signed integer PCM
-        - 24-bit signed integer PCM
-        - 32-bit signed integer PCM (default)
-        - 8-bit mu-law
-        - 8-bit a-law
-        - 16-bit a-law
-        - 24-bit a-law
-        - 32-bit a-law
-
-    """
-    if src.ndim != 2:
-        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
-    if compression is not None:
-        warnings.warn(
-            '`save` function of "soundfile" backend does not support "compression" parameter. '
-            "The argument is silently ignored."
-        )
-    if hasattr(filepath, 'write'):
-        if format is None:
-            raise RuntimeError('`format` is required when saving to file object.')
-        ext = format.lower()
-    else:
-        ext = str(filepath).split(".")[-1].lower()
-
-    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
-        raise ValueError("Invalid bits_per_sample.")
-    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
-
-    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
-    # so we extend the extensions manually here
-    if ext in ["nis", "nist", "sph"] and format is None:
-        format = "NIST"
-
-    if channels_first:
-        src = src.t()
-
-    soundfile.write(
-        file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format
-    )
-
-
-@_mod_utils.requires_module("soundfile")
-@_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0")
-def load_wav(
-    filepath: str,
-    frame_offset: int = 0,
-    num_frames: int = -1,
-    channels_first: bool = True,
-) -> Tuple[torch.Tensor, int]:
-    """Load wave file.
-
-    This function is defined only for the purpose of compatibility against other backend
-    for simple usecases, such as ``torchaudio.load_wav(filepath)``.
-    The implementation is same as :py:func:`load`.
-    """
-    return load(
-        filepath,
-        frame_offset,
-        num_frames,
-        normalize=False,
-        channels_first=channels_first,
-    )
diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py
index 571f950109..6ccd36e4dc 100644
--- a/torchaudio/backend/common.py
+++ b/torchaudio/backend/common.py
@@ -1,7 +1,3 @@
-from typing import Any, Optional
-import warnings
-
-
 class AudioMetaData:
     """Return type of ``torchaudio.info`` function.
 
@@ -28,185 +24,3 @@ def __init__(
         self.num_channels = num_channels
         self.bits_per_sample = bits_per_sample
         self.encoding = encoding
-
-
-class SignalInfo:
-    """One of return types of ``torchaudio.info`` functions.
-
-    This class is used by :ref:`"sox" backend (deprecated)<sox_backend>` and
-    :ref:`"soundfile" backend with the legacy interface (deprecated)<soundfile_legacy_backend>`.
-
-    See https://fossies.org/dox/sox-14.4.2/structsox__signalinfo__t.html
-
-    :ivar Optional[int] channels: The number of channels
-    :ivar Optional[float] rate: Sampleing rate
-    :ivar Optional[int] precision: Bit depth
-    :ivar Optional[int] length: For :ref:`sox backend<sox_backend>`, the number of samples.
-        (frames * channels). For :ref:`soundfile backend<soundfile_backend>`, the number of frames.
-    """
-    def __init__(self,
-                 channels: Optional[int] = None,
-                 rate: Optional[float] = None,
-                 precision: Optional[int] = None,
-                 length: Optional[int] = None) -> None:
-        message = (
-            f'{self.__module__}.{self.__class__.__name__} has been deprecated '
-            'and will be removed from 0.9.0 release. '
-            'Please migrate to `AudioMetaData`.'
-        )
-        warnings.warn(message)
-        self.channels = channels
-        self.rate = rate
-        self.precision = precision
-        self.length = length
-
-
-class EncodingInfo:
-    """One of return types of ``torchaudio.info`` functions.
-
-    This class is used by :ref:`"sox" backend (deprecated)<sox_backend>` and
-    :ref:`"soundfile" backend with the legacy interface (deprecated)<soundfile_legacy_backend>`.
-
-    See https://fossies.org/dox/sox-14.4.2/structsox__encodinginfo__t.html
-
-    :ivar Optional[int] encoding: sox_encoding_t
-    :ivar Optional[int] bits_per_sample: bit depth
-    :ivar Optional[float] compression: Compression option
-    :ivar Any reverse_bytes:
-    :ivar Any reverse_nibbles:
-    :ivar Any reverse_bits:
-    :ivar Optional[bool] opposite_endian:
-    """
-    def __init__(self,
-                 encoding: Any = None,
-                 bits_per_sample: Optional[int] = None,
-                 compression: Optional[float] = None,
-                 reverse_bytes: Any = None,
-                 reverse_nibbles: Any = None,
-                 reverse_bits: Any = None,
-                 opposite_endian: Optional[bool] = None) -> None:
-        message = (
-            f'{self.__module__}.{self.__class__.__name__} has been deprecated '
-            'and will be removed from 0.9.0 release. '
-            'Please migrate to `AudioMetaData`.'
-        )
-        warnings.warn(message)
-        self.encoding = encoding
-        self.bits_per_sample = bits_per_sample
-        self.compression = compression
-        self.reverse_bytes = reverse_bytes
-        self.reverse_nibbles = reverse_nibbles
-        self.reverse_bits = reverse_bits
-        self.opposite_endian = opposite_endian
-
-
-_LOAD_DOCSTRING = r"""Loads an audio file from disk into a tensor
-
-Args:
-    filepath: Path to audio file
-
-    out: An optional output tensor to use instead of creating one. (Default: ``None``)
-
-    normalization: Optional normalization.
-        If boolean `True`, then output is divided by `1 << 31`.
-        Assuming the input is signed 32-bit audio, this normalizes to `[-1, 1]`.
-        If `float`, then output is divided by that number.
-        If `Callable`, then the output is passed as a paramete to the given function,
-        then the output is divided by the result. (Default: ``True``)
-
-    channels_first: Set channels first or length first in result. (Default: ``True``)
-
-    num_frames: Number of frames to load.  0 to load everything after the offset.
-        (Default: ``0``)
-
-    offset: Number of frames from the start of the file to begin data loading.
-        (Default: ``0``)
-
-    signalinfo: A sox_signalinfo_t type, which could be helpful if the
-        audio type cannot be automatically determined. (Default: ``None``)
-
-    encodinginfo: A sox_encodinginfo_t type, which could be set if the
-        audio type cannot be automatically determined. (Default: ``None``)
-
-    filetype: A filetype or extension to be set if sox cannot determine it
-        automatically. (Default: ``None``)
-
-Returns:
-    (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where
-        L is the number of audio frames and
-        C is the number of channels.
-        An integer which is the sample rate of the audio (as listed in the metadata of the file)
-
-Example
-    >>> data, sample_rate = torchaudio.load('foo.mp3')
-    >>> print(data.size())
-    torch.Size([2, 278756])
-    >>> print(sample_rate)
-    44100
-    >>> data_vol_normalized, _ = torchaudio.load('foo.mp3', normalization=lambda x: torch.abs(x).max())
-    >>> print(data_vol_normalized.abs().max())
-    1.
-"""
-
-
-_LOAD_WAV_DOCSTRING = r""" Loads a wave file.
-
-It assumes that the wav file uses 16 bit per sample that needs normalization by
-shifting the input right by 16 bits.
-
-Args:
-    filepath: Path to audio file
-
-Returns:
-    (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where L is the number
-        of audio frames and C is the number of channels. An integer which is the sample rate of the
-        audio (as listed in the metadata of the file)
-"""
-
-_SAVE_DOCSTRING = r"""Saves a Tensor on file as an audio file
-
-Args:
-    filepath: Path to audio file
-    src: An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
-        the number of audio frames, C is the number of channels
-    sample_rate: An integer which is the sample rate of the
-        audio (as listed in the metadata of the file)
-    precision Bit precision (Default: ``16``)
-    channels_first (bool, optional): Set channels first or length first in result. (
-        Default: ``True``)
-"""
-
-
-_INFO_DOCSTRING = r"""Gets metadata from an audio file without loading the signal.
-
-Args:
-    filepath: Path to audio file
-
-Returns:
-    (sox_signalinfo_t, sox_encodinginfo_t): A si (sox_signalinfo_t) signal
-        info as a python object. An ei (sox_encodinginfo_t) encoding info
-
-Example
-    >>> si, ei = torchaudio.info('foo.wav')
-    >>> rate, channels, encoding = si.rate, si.channels, ei.encoding
-"""
-
-
-def _impl_load(func):
-    setattr(func, '__doc__', _LOAD_DOCSTRING)
-    return func
-
-
-def _impl_load_wav(func):
-    setattr(func, '__doc__', _LOAD_WAV_DOCSTRING)
-    return func
-
-
-def _impl_save(func):
-    setattr(func, '__doc__', _SAVE_DOCSTRING)
-    return func
-
-
-def _impl_info(func):
-    setattr(func, '__doc__', _INFO_DOCSTRING)
-    return func
diff --git a/torchaudio/backend/no_backend.py b/torchaudio/backend/no_backend.py
index 60571c34a9..453bfcdc54 100644
--- a/torchaudio/backend/no_backend.py
+++ b/torchaudio/backend/no_backend.py
@@ -3,33 +3,24 @@
 
 from torch import Tensor
 
-from . import common
-from .common import SignalInfo, EncodingInfo
 
-
-@common._impl_load
 def load(filepath: Union[str, Path],
          out: Optional[Tensor] = None,
          normalization: Union[bool, float, Callable] = True,
          channels_first: bool = True,
          num_frames: int = 0,
          offset: int = 0,
-         signalinfo: Optional[SignalInfo] = None,
-         encodinginfo: Optional[EncodingInfo] = None,
          filetype: Optional[str] = None) -> Tuple[Tensor, int]:
     raise RuntimeError('No audio I/O backend is available.')
 
 
-@common._impl_load_wav
 def load_wav(filepath: Union[str, Path], **kwargs: Any) -> Tuple[Tensor, int]:
     raise RuntimeError('No audio I/O backend is available.')
 
 
-@common._impl_save
 def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
     raise RuntimeError('No audio I/O backend is available.')
 
 
-@common._impl_info
-def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
+def info(filepath: str) -> None:
     raise RuntimeError('No audio I/O backend is available.')
diff --git a/torchaudio/backend/soundfile_backend.py b/torchaudio/backend/soundfile_backend.py
index 49cada21b0..f939548413 100644
--- a/torchaudio/backend/soundfile_backend.py
+++ b/torchaudio/backend/soundfile_backend.py
@@ -1,127 +1,449 @@
-import os
-from typing import Optional, Tuple
+"""The new soundfile backend which will become default in 0.8.0 onward"""
+from typing import Tuple, Optional
+import warnings
 
 import torch
-from torch import Tensor
+from torchaudio._internal import module_utils as _mod_utils
+from .common import AudioMetaData
 
-from torchaudio._internal import (
-    module_utils as _mod_utils,
-    misc_ops as _misc_ops,
-)
-from . import common
-from .common import SignalInfo, EncodingInfo
 
-if _mod_utils.is_module_available('soundfile'):
+if _mod_utils.is_module_available("soundfile"):
     import soundfile
 
 
-_subtype_to_precision = {
-    'PCM_S8': 8,
-    'PCM_16': 16,
-    'PCM_24': 24,
-    'PCM_32': 32,
-    'PCM_U8': 8
+# Mapping from soundfile subtype to number of bits per sample.
+# This is mostly heuristical and the value is set to 0 when it is irrelevant
+# (lossy formats) or when it can't be inferred.
+# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
+# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
+# the default seems to be 8 bits but it can be compressed further to 4 bits.
+# The dict is inspired from
+# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
+_SUBTYPE_TO_BITS_PER_SAMPLE = {
+    'PCM_S8': 8,  # Signed 8 bit data
+    'PCM_16': 16,  # Signed 16 bit data
+    'PCM_24': 24,  # Signed 24 bit data
+    'PCM_32': 32,  # Signed 32 bit data
+    'PCM_U8': 8,  # Unsigned 8 bit data (WAV and RAW only)
+    'FLOAT': 32,  # 32 bit float data
+    'DOUBLE': 64,  # 64 bit float data
+    'ULAW': 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    'ALAW': 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    'IMA_ADPCM': 0,  # IMA ADPCM.
+    'MS_ADPCM': 0,  # Microsoft ADPCM.
+    'GSM610': 0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
+    'VOX_ADPCM': 0,  # OKI / Dialogix ADPCM
+    'G721_32': 0,  # 32kbs G721 ADPCM encoding.
+    'G723_24': 0,  # 24kbs G723 ADPCM encoding.
+    'G723_40': 0,  # 40kbs G723 ADPCM encoding.
+    'DWVW_12': 12,  # 12 bit Delta Width Variable Word encoding.
+    'DWVW_16': 16,  # 16 bit Delta Width Variable Word encoding.
+    'DWVW_24': 24,  # 24 bit Delta Width Variable Word encoding.
+    'DWVW_N': 0,  # N bit Delta Width Variable Word encoding.
+    'DPCM_8': 8,  # 8 bit differential PCM (XI only)
+    'DPCM_16': 16,  # 16 bit differential PCM (XI only)
+    'VORBIS': 0,  # Xiph Vorbis encoding. (lossy)
+    'ALAC_16': 16,  # Apple Lossless Audio Codec (16 bit).
+    'ALAC_20': 20,  # Apple Lossless Audio Codec (20 bit).
+    'ALAC_24': 24,  # Apple Lossless Audio Codec (24 bit).
+    'ALAC_32': 32,  # Apple Lossless Audio Codec (32 bit).
 }
 
 
-@_mod_utils.requires_module('soundfile')
-@common._impl_load
-def load(filepath: str,
-         out: Optional[Tensor] = None,
-         normalization: Optional[bool] = True,
-         channels_first: Optional[bool] = True,
-         num_frames: int = 0,
-         offset: int = 0,
-         signalinfo: SignalInfo = None,
-         encodinginfo: EncodingInfo = None,
-         filetype: Optional[str] = None) -> Tuple[Tensor, int]:
-    r"""See torchaudio.load"""
-
-    assert out is None
-    assert normalization
-    assert signalinfo is None
-    assert encodinginfo is None
-
-    # stringify if `pathlib.Path` (noop if already `str`)
-    filepath = str(filepath)
-
-    # check if valid file
-    if not os.path.isfile(filepath):
-        raise OSError("{} not found or is a directory".format(filepath))
-
-    if num_frames < -1:
-        raise ValueError("Expected value for num_samples -1 (entire file) or >=0")
-    if num_frames == 0:
-        num_frames = -1
-    if offset < 0:
-        raise ValueError("Expected positive offset value")
-
-    # initialize output tensor
-    # TODO call libsoundfile directly to avoid numpy
-    out, sample_rate = soundfile.read(
-        filepath, frames=num_frames, start=offset, dtype="float32", always_2d=True
+def _get_bit_depth(subtype):
+    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
+        warnings.warn(
+            f"The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample "
+            "attribute will be set to 0. If you are seeing this warning, please "
+            "report by opening an issue on github (after checking for existing/closed ones). "
+            "You may otherwise ignore this warning."
+        )
+    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
+
+
+_SUBTYPE_TO_ENCODING = {
+    'PCM_S8': 'PCM_S',
+    'PCM_16': 'PCM_S',
+    'PCM_24': 'PCM_S',
+    'PCM_32': 'PCM_S',
+    'PCM_U8': 'PCM_U',
+    'FLOAT': 'PCM_F',
+    'DOUBLE': 'PCM_F',
+    'ULAW': 'ULAW',
+    'ALAW': 'ALAW',
+    'VORBIS': 'VORBIS',
+}
+
+
+def _get_encoding(format: str, subtype: str):
+    if format == 'FLAC':
+        return 'FLAC'
+    return _SUBTYPE_TO_ENCODING.get(subtype, 'UNKNOWN')
+
+
+@_mod_utils.requires_module("soundfile")
+def info(filepath: str, format: Optional[str] = None) -> AudioMetaData:
+    """Get signal information of an audio file.
+
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+            Note:
+                  * This argument is intentionally annotated as ``str`` only,
+                    for the consistency with "sox_io" backend, which has a restriction
+                    on type annotation due to TorchScript compiler compatiblity.
+        format (str, optional):
+            Not used. PySoundFile does not accept format hint.
+
+    Returns:
+        AudioMetaData: meta data of the given audio.
+    """
+    sinfo = soundfile.info(filepath)
+    return AudioMetaData(
+        sinfo.samplerate,
+        sinfo.frames,
+        sinfo.channels,
+        bits_per_sample=_get_bit_depth(sinfo.subtype),
+        encoding=_get_encoding(sinfo.format, sinfo.subtype),
     )
-    out = torch.from_numpy(out).t()
 
-    if not channels_first:
-        out = out.t()
 
-    # normalize if needed
-    # _audio_normalization(out, normalization)
+_SUBTYPE2DTYPE = {
+    "PCM_S8": "int8",
+    "PCM_U8": "uint8",
+    "PCM_16": "int16",
+    "PCM_32": "int32",
+    "FLOAT": "float32",
+    "DOUBLE": "float64",
+}
+
+
+@_mod_utils.requires_module("soundfile")
+def load(
+    filepath: str,
+    frame_offset: int = 0,
+    num_frames: int = -1,
+    normalize: bool = True,
+    channels_first: bool = True,
+    format: Optional[str] = None,
+) -> Tuple[torch.Tensor, int]:
+    """Load audio data from file.
+
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
+
+        * WAV
 
-    return out, sample_rate
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
 
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
 
-@_mod_utils.requires_module('soundfile')
-@_mod_utils.deprecated('Please use "torchaudio.load".', '0.9.0')
-@common._impl_load_wav
-def load_wav(filepath, **kwargs):
-    kwargs['normalization'] = 1 << 16
-    return load(filepath, **kwargs)
+    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
+    ``float32`` dtype and the shape of ``[channel, time]``.
+    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
 
+    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
+    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
+    by providing ``normalize=False``, this function can return integer Tensor, where the samples
+    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
+    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
 
-@_mod_utils.requires_module('soundfile')
-@common._impl_save
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
-    r"""See torchaudio.save"""
+    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
+    ``flac`` and ``mp3``.
+    For these formats, this function always returns ``float32`` Tensor with values normalized to
+    ``[-1.0, 1.0]``.
 
-    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+            Note:
+                  * This argument is intentionally annotated as ``str`` only,
+                    for the consistency with "sox_io" backend, which has a restriction
+                    on type annotation due to TorchScript compiler compatiblity.
+        frame_offset (int):
+            Number of frames to skip before start reading data.
+        num_frames (int):
+            Maximum number of frames to read. ``-1`` reads all the remaining samples,
+            starting from ``frame_offset``.
+            This function may return the less number of frames if there is not enough
+            frames in the given file.
+        normalize (bool):
+            When ``True``, this function always return ``float32``, and sample values are
+            normalized to ``[-1.0, 1.0]``.
+            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
+            integer type.
+            This argument has no effect for formats other than integer WAV type.
+        channels_first (bool):
+            When True, the returned Tensor has dimension ``[channel, time]``.
+            Otherwise, the returned Tensor's dimension is ``[time, channel]``.
+        format (str, optional):
+            Not used. PySoundFile does not accept format hint.
 
-    # check if save directory exists
-    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
-    if not os.path.isdir(abs_dirpath):
-        raise OSError("Directory does not exist: {}".format(abs_dirpath))
-    # check that src is a CPU tensor
-    _misc_ops.check_input(src)
-    # Check/Fix shape of source data
-    if src.dim() == 1:
-        # 1d tensors as assumed to be mono signals
-        src.unsqueeze_(ch_idx)
-    elif src.dim() > 2 or src.size(ch_idx) > 16:
-        # assumes num_channels < 16
-        raise ValueError(
-            "Expected format where C < 16, but found {}".format(src.size()))
+    Returns:
+        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
+            If the input file has integer wav format and normalization is off, then it has
+            integer type, else ``float32`` type. If ``channels_first=True``, it has
+            ``[channel, time]`` else ``[time, channel]``.
+    """
+    with soundfile.SoundFile(filepath, "r") as file_:
+        if file_.format != "WAV" or normalize:
+            dtype = "float32"
+        elif file_.subtype not in _SUBTYPE2DTYPE:
+            raise ValueError(f"Unsupported subtype: {file_.subtype}")
+        else:
+            dtype = _SUBTYPE2DTYPE[file_.subtype]
 
+        frames = file_._prepare_read(frame_offset, None, num_frames)
+        waveform = file_.read(frames, dtype, always_2d=True)
+        sample_rate = file_.samplerate
+
+    waveform = torch.from_numpy(waveform)
     if channels_first:
-        src = src.t()
+        waveform = waveform.t()
+    return waveform, sample_rate
+
+
+def _get_subtype_for_wav(
+        dtype: torch.dtype,
+        encoding: str,
+        bits_per_sample: int):
+    if not encoding:
+        if not bits_per_sample:
+            subtype = {
+                torch.uint8: "PCM_U8",
+                torch.int16: "PCM_16",
+                torch.int32: "PCM_32",
+                torch.float32: "FLOAT",
+                torch.float64: "DOUBLE",
+            }.get(dtype)
+            if not subtype:
+                raise ValueError(f"Unsupported dtype for wav: {dtype}")
+            return subtype
+        if bits_per_sample == 8:
+            return "PCM_U8"
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_S":
+        if not bits_per_sample:
+            return "PCM_32"
+        if bits_per_sample == 8:
+            raise ValueError("wav does not support 8-bit signed PCM encoding.")
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_U":
+        if bits_per_sample in (None, 8):
+            return "PCM_U8"
+        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
+    if encoding == "PCM_F":
+        if bits_per_sample in (None, 32):
+            return "FLOAT"
+        if bits_per_sample == 64:
+            return "DOUBLE"
+        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("wav only supports 8-bit mu-law encoding.")
+    if encoding == "ALAW":
+        if bits_per_sample in (None, 8):
+            return "ALAW"
+        raise ValueError("wav only supports 8-bit a-law encoding.")
+    raise ValueError(f"wav does not support {encoding}.")
+
+
+def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
+    if encoding in (None, "PCM_S"):
+        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
+    if encoding in ("PCM_U", "PCM_F"):
+        raise ValueError(f"sph does not support {encoding} encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("sph only supports 8-bit for mu-law encoding.")
+    if encoding == "ALAW":
+        return "ALAW"
+    raise ValueError(f"sph does not support {encoding}.")
+
+
+def _get_subtype(
+        dtype: torch.dtype,
+        format: str,
+        encoding: str,
+        bits_per_sample: int):
+    if format == "wav":
+        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
+    if format == "flac":
+        if encoding:
+            raise ValueError("flac does not support encoding.")
+        if not bits_per_sample:
+            return "PCM_24"
+        if bits_per_sample > 24:
+            raise ValueError("flac does not support bits_per_sample > 24.")
+        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
+    if format in ("ogg", "vorbis"):
+        if encoding or bits_per_sample:
+            raise ValueError(
+                "ogg/vorbis does not support encoding/bits_per_sample.")
+        return "VORBIS"
+    if format == "sph":
+        return _get_subtype_for_sphere(encoding, bits_per_sample)
+    if format in ("nis", "nist"):
+        return "PCM_16"
+    raise ValueError(f"Unsupported format: {format}")
 
-    if src.dtype == torch.int64:
-        # Soundfile doesn't support int64
-        src = src.type(torch.int32)
 
-    precision = "PCM_S8" if precision == 8 else "PCM_" + str(precision)
+@_mod_utils.requires_module("soundfile")
+def save(
+    filepath: str,
+    src: torch.Tensor,
+    sample_rate: int,
+    channels_first: bool = True,
+    compression: Optional[float] = None,
+    format: Optional[str] = None,
+    encoding: Optional[str] = None,
+    bits_per_sample: Optional[int] = None,
+):
+    """Save audio data to file.
 
-    return soundfile.write(filepath, src, sample_rate, precision)
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
 
+        * WAV
 
-@_mod_utils.requires_module('soundfile')
-@common._impl_info
-def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
-    r"""See torchaudio.info"""
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
 
-    sfi = soundfile.info(filepath)
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
 
-    precision = _subtype_to_precision[sfi.subtype]
-    si = SignalInfo(sfi.channels, sfi.samplerate, precision, sfi.frames)
-    ei = EncodingInfo(bits_per_sample=precision)
-    return si, ei
+    Args:
+        filepath (str or pathlib.Path): Path to audio file.
+            This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str``
+            for the consistency with "sox_io" backend, which has a restriction on type annotation
+            for TorchScript compiler compatiblity.
+        src (torch.Tensor): Audio data to save. must be 2D tensor.
+        sample_rate (int): sampling rate
+        channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``,
+            otherwise ``[time, channel]``.
+        compression (Optional[float]): Not used.
+            It is here only for interface compatibility reson with "sox_io" backend.
+        format (str, optional): Override the audio format.
+            When ``filepath`` argument is path-like object, audio format is
+            inferred from file extension. If the file extension is missing or
+            different, you can specify the correct format with this argument.
+
+            When ``filepath`` argument is file-like object,
+            this argument is required.
+
+            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
+            ``"flac"`` and ``"sph"``.
+        encoding (str, optional): Changes the encoding for supported formats.
+            This argument is effective only for supported formats, sush as
+            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are;
+
+                - ``"PCM_S"`` (signed integer Linear PCM)
+                - ``"PCM_U"`` (unsigned integer Linear PCM)
+                - ``"PCM_F"`` (floating point PCM)
+                - ``"ULAW"`` (mu-law)
+                - ``"ALAW"`` (a-law)
+
+        bits_per_sample (int, optional): Changes the bit depth for the
+            supported formats.
+            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
+            you can change the bit depth.
+            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.
+
+    Supported formats/encodings/bit depth/compression are:
+
+    ``"wav"``
+        - 32-bit floating-point PCM
+        - 32-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 8-bit unsigned integer PCM
+        - 8-bit mu-law
+        - 8-bit a-law
+
+        Note: Default encoding/bit depth is determined by the dtype of
+              the input Tensor.
+
+    ``"flac"``
+        - 8-bit
+        - 16-bit
+        - 24-bit (default)
+
+    ``"ogg"``, ``"vorbis"``
+        - Doesn't accept changing configuration.
+
+    ``"sph"``
+        - 8-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 32-bit signed integer PCM (default)
+        - 8-bit mu-law
+        - 8-bit a-law
+        - 16-bit a-law
+        - 24-bit a-law
+        - 32-bit a-law
+
+    """
+    if src.ndim != 2:
+        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
+    if compression is not None:
+        warnings.warn(
+            '`save` function of "soundfile" backend does not support "compression" parameter. '
+            "The argument is silently ignored."
+        )
+    if hasattr(filepath, 'write'):
+        if format is None:
+            raise RuntimeError('`format` is required when saving to file object.')
+        ext = format.lower()
+    else:
+        ext = str(filepath).split(".")[-1].lower()
+
+    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
+        raise ValueError("Invalid bits_per_sample.")
+    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
+
+    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
+    # so we extend the extensions manually here
+    if ext in ["nis", "nist", "sph"] and format is None:
+        format = "NIST"
+
+    if channels_first:
+        src = src.t()
+
+    soundfile.write(
+        file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format
+    )
+
+
+@_mod_utils.requires_module("soundfile")
+@_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0")
+def load_wav(
+    filepath: str,
+    frame_offset: int = 0,
+    num_frames: int = -1,
+    channels_first: bool = True,
+) -> Tuple[torch.Tensor, int]:
+    """Load wave file.
+
+    This function is defined only for the purpose of compatibility against other backend
+    for simple usecases, such as ``torchaudio.load_wav(filepath)``.
+    The implementation is same as :py:func:`load`.
+    """
+    return load(
+        filepath,
+        frame_offset,
+        num_frames,
+        normalize=False,
+        channels_first=channels_first,
+    )
diff --git a/torchaudio/backend/sox_backend.py b/torchaudio/backend/sox_backend.py
deleted file mode 100644
index bf167dd195..0000000000
--- a/torchaudio/backend/sox_backend.py
+++ /dev/null
@@ -1,294 +0,0 @@
-import os.path
-from typing import Any, Optional, Tuple
-
-import torch
-from torch import Tensor
-
-from torchaudio._internal import (
-    module_utils as _mod_utils,
-    misc_ops as _misc_ops,
-)
-from . import common
-from .common import SignalInfo, EncodingInfo
-
-if _mod_utils.is_module_available('torchaudio._torchaudio'):
-    from torchaudio import _torchaudio
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@common._impl_load
-def load(filepath: str,
-         out: Optional[Tensor] = None,
-         normalization: bool = True,
-         channels_first: bool = True,
-         num_frames: int = 0,
-         offset: int = 0,
-         signalinfo: SignalInfo = None,
-         encodinginfo: EncodingInfo = None,
-         filetype: Optional[str] = None) -> Tuple[Tensor, int]:
-    r"""See torchaudio.load"""
-
-    # stringify if `pathlib.Path` (noop if already `str`)
-    filepath = str(filepath)
-    # check if valid file
-    if not os.path.isfile(filepath):
-        raise OSError("{} not found or is a directory".format(filepath))
-
-    # initialize output tensor
-    if out is not None:
-        _misc_ops.check_input(out)
-    else:
-        out = torch.FloatTensor()
-
-    if num_frames < -1:
-        raise ValueError("Expected value for num_samples -1 (entire file) or >=0")
-    if offset < 0:
-        raise ValueError("Expected positive offset value")
-
-    sample_rate = _torchaudio.read_audio_file(
-        filepath,
-        out,
-        channels_first,
-        num_frames,
-        offset,
-        signalinfo,
-        encodinginfo,
-        filetype
-    )
-
-    # normalize if needed
-    _misc_ops.normalize_audio(out, normalization)
-
-    return out, sample_rate
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated('Please use "torchaudio.load".', '0.9.0')
-@common._impl_load_wav
-def load_wav(filepath, **kwargs):
-    kwargs['normalization'] = 1 << 16
-    return load(filepath, **kwargs)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@common._impl_save
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
-    r"""See torchaudio.save"""
-
-    si = sox_signalinfo_t()
-    ch_idx = 0 if channels_first else 1
-    si.rate = sample_rate
-    si.channels = 1 if src.dim() == 1 else src.size(ch_idx)
-    si.length = src.numel()
-    si.precision = precision
-    return save_encinfo(filepath, src, channels_first, si)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@common._impl_info
-def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
-    r"""See torchaudio.info"""
-    return _torchaudio.get_info(filepath)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def save_encinfo(filepath: str,
-                 src: Tensor,
-                 channels_first: bool = True,
-                 signalinfo: Optional[SignalInfo] = None,
-                 encodinginfo: Optional[EncodingInfo] = None,
-                 filetype: Optional[str] = None) -> None:
-    r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.
-
-    Args:
-        filepath (str): Path to audio file
-        src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
-            the number of audio frames, C is the number of channels
-        channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
-        signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
-            audio type cannot be automatically determined (Default: ``None``).
-        encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
-            audio type cannot be automatically determined (Default: ``None``).
-        filetype (str, optional): A filetype or extension to be set if sox cannot determine it
-            automatically. (Default: ``None``)
-
-    Example
-        >>> data, sample_rate = torchaudio.load('foo.mp3')
-        >>> torchaudio.save('foo.wav', data, sample_rate)
-
-    """
-    ch_idx, len_idx = (0, 1) if channels_first else (1, 0)
-
-    # check if save directory exists
-    abs_dirpath = os.path.dirname(os.path.abspath(filepath))
-    if not os.path.isdir(abs_dirpath):
-        raise OSError("Directory does not exist: {}".format(abs_dirpath))
-    # check that src is a CPU tensor
-    _misc_ops.check_input(src)
-    # Check/Fix shape of source data
-    if src.dim() == 1:
-        # 1d tensors as assumed to be mono signals
-        src.unsqueeze_(ch_idx)
-    elif src.dim() > 2 or src.size(ch_idx) > 16:
-        # assumes num_channels < 16
-        raise ValueError(
-            "Expected format where C < 16, but found {}".format(src.size()))
-    # sox stores the sample rate as a float, though practically sample rates are almost always integers
-    # convert integers to floats
-    if signalinfo:
-        if signalinfo.rate and not isinstance(signalinfo.rate, float):
-            if float(signalinfo.rate) == signalinfo.rate:
-                signalinfo.rate = float(signalinfo.rate)
-            else:
-                raise TypeError('Sample rate should be a float or int')
-        # check if the bit precision (i.e. bits per sample) is an integer
-        if signalinfo.precision and not isinstance(signalinfo.precision, int):
-            if int(signalinfo.precision) == signalinfo.precision:
-                signalinfo.precision = int(signalinfo.precision)
-            else:
-                raise TypeError('Bit precision should be an integer')
-    # programs such as librosa normalize the signal, unnormalize if detected
-    if src.min() >= -1.0 and src.max() <= 1.0:
-        src = src * (1 << 31)
-        src = src.long()
-    # set filetype and allow for files with no extensions
-    extension = os.path.splitext(filepath)[1]
-    filetype = extension[1:] if len(extension) > 0 else filetype
-    # transpose from C x L -> L x C
-    if channels_first:
-        src = src.transpose(1, 0)
-    # save data to file
-    src = src.contiguous()
-    _torchaudio.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def sox_signalinfo_t() -> SignalInfo:
-    r"""Create a sox_signalinfo_t object. This object can be used to set the sample
-    rate, number of channels, length, bit precision and headroom multiplier
-    primarily for effects
-
-    Returns: sox_signalinfo_t(object)
-        - rate (float), sample rate as a float, practically will likely be an integer float
-        - channel (int), number of audio channels
-        - precision (int), bit precision
-        - length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown
-        - mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier
-
-    Example
-        >>> si = torchaudio.sox_signalinfo_t()
-        >>> si.channels = 1
-        >>> si.rate = 16000.
-        >>> si.precision = 16
-        >>> si.length = 0
-    """
-    return _torchaudio.sox_signalinfo_t()
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def sox_encodinginfo_t() -> EncodingInfo:
-    r"""Create a sox_encodinginfo_t object.  This object can be used to set the encoding
-    type, bit precision, compression factor, reverse bytes, reverse nibbles,
-    reverse bits and endianness.  This can be used in an effects chain to encode the
-    final output or to save a file with a specific encoding.  For example, one could
-    use the sox ulaw encoding to do 8-bit ulaw encoding.  Note in a tensor output
-    the result will be a 32-bit number, but number of unique values will be determined by
-    the bit precision.
-
-    Returns: sox_encodinginfo_t(object)
-        - encoding (sox_encoding_t), output encoding
-        - bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t
-        - compression (float), compression for lossy formats, 0.0 for default compression
-        - reverse_bytes (sox_option_t), reverse bytes, use sox_option_default
-        - reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default
-        - reverse_bits (sox_option_t), reverse bytes, use sox_option_default
-        - opposite_endian (sox_bool), change endianness, use sox_false
-
-    Example
-        >>> ei = torchaudio.sox_encodinginfo_t()
-        >>> ei.encoding = torchaudio.get_sox_encoding_t(1)
-        >>> ei.bits_per_sample = 16
-        >>> ei.compression = 0
-        >>> ei.reverse_bytes = torchaudio.get_sox_option_t(2)
-        >>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2)
-        >>> ei.reverse_bits = torchaudio.get_sox_option_t(2)
-        >>> ei.opposite_endian = torchaudio.get_sox_bool(0)
-
-    """
-    ei = _torchaudio.sox_encodinginfo_t()
-    sdo = get_sox_option_t(2)  # sox_default_option
-    ei.reverse_bytes = sdo
-    ei.reverse_nibbles = sdo
-    ei.reverse_bits = sdo
-    return ei
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def get_sox_encoding_t(i: int = None) -> EncodingInfo:
-    r"""Get enum of sox_encoding_t for sox encodings.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified. (Default: ``None``)
-
-    Returns:
-        sox_encoding_t: A sox_encoding_t type for output encoding
-    """
-    if i is None:
-        # one can see all possible values using the .__members__ attribute
-        return _torchaudio.sox_encoding_t
-    else:
-        return _torchaudio.sox_encoding_t(i)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def get_sox_option_t(i: int = 2) -> Any:
-    r"""Get enum of sox_option_t for sox encodinginfo options.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified.
-            (Default: ``sox_option_default`` or ``2``)
-    Returns:
-        sox_option_t: A sox_option_t type
-    """
-    if i is None:
-        return _torchaudio.sox_option_t
-    else:
-        return _torchaudio.sox_option_t(i)
-
-
-@_mod_utils.requires_module('torchaudio._torchaudio')
-@_mod_utils.deprecated(
-    'Please migrate to "sox_io" backend. See https://github.com/pytorch/audio/issues/903 for the detail',
-    '0.9.0')
-def get_sox_bool(i: int = 0) -> Any:
-    r"""Get enum of sox_bool for sox encodinginfo options.
-
-    Args:
-        i (int, optional): Choose type or get a dict with all possible options
-            use ``__members__`` to see all options when not specified. (Default:
-            ``sox_false`` or ``0``)
-
-    Returns:
-        sox_bool: A sox_bool type
-    """
-    if i is None:
-        return _torchaudio.sox_bool
-    else:
-        return _torchaudio.sox_bool(i)
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index ecfd5ebd42..8b65f9020f 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -9,6 +9,9 @@
 import torchaudio
 from .common import AudioMetaData
 
+if _mod_utils.is_module_available('torchaudio._torchaudio'):
+    from torchaudio import _torchaudio
+
 
 @_mod_utils.requires_module('torchaudio._torchaudio')
 def info(
diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py
index b9f6b13edf..a5562d718d 100644
--- a/torchaudio/backend/utils.py
+++ b/torchaudio/backend/utils.py
@@ -6,10 +6,8 @@
 from torchaudio._internal.module_utils import is_module_available
 from . import (
     no_backend,
-    sox_backend,
     sox_io_backend,
     soundfile_backend,
-    _soundfile_backend,
 )
 
 __all__ = [
@@ -29,7 +27,6 @@ def list_audio_backends() -> List[str]:
     if is_module_available('soundfile'):
         backends.append('soundfile')
     if is_module_available('torchaudio._torchaudio'):
-        backends.append('sox')
         backends.append('sox_io')
     return backends
 
@@ -39,15 +36,9 @@ def set_audio_backend(backend: Optional[str]):
 
     Args:
         backend (Optional[str]): Name of the backend.
-            One of ``"sox"``, ``"sox_io"`` or ``"soundfile"`` based on availability
+            One of ``"sox_io"`` or ``"soundfile"`` based on availability
             of the system. If ``None`` is provided the  current backend is unassigned.
     """
-    if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE is not None:
-        warnings.warn(
-            '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is deprecated and will be removed in 0.9.0. '
-            'Please remove the use of flag.'
-        )
-
     if backend is not None and backend not in list_audio_backends():
         raise RuntimeError(
             f'Backend "{backend}" is not one of '
@@ -55,23 +46,10 @@ def set_audio_backend(backend: Optional[str]):
 
     if backend is None:
         module = no_backend
-    elif backend == 'sox':
-        warnings.warn(
-            '"sox" backend is deprecated and will be removed in 0.9.0. '
-            'Please use "sox_io" backend.'
-        )
-        module = sox_backend
     elif backend == 'sox_io':
         module = sox_io_backend
     elif backend == 'soundfile':
-        if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE:
-            warnings.warn(
-                'The legacy interface of "soundfile" backend is deprecated and will be removed in 0.9.0. '
-                'Please migrate to the new interface.'
-            )
-            module = soundfile_backend
-        else:
-            module = _soundfile_backend
+        module = soundfile_backend
     else:
         raise NotImplementedError(f'Unexpected backend "{backend}"')
 
@@ -98,10 +76,8 @@ def get_audio_backend() -> Optional[str]:
     """
     if torchaudio.load == no_backend.load:
         return None
-    if torchaudio.load == sox_backend.load:
-        return 'sox'
     if torchaudio.load == sox_io_backend.load:
         return 'sox_io'
-    if torchaudio.load in [soundfile_backend.load, _soundfile_backend.load]:
+    if torchaudio.load == soundfile_backend.load:
         return 'soundfile'
     raise ValueError('Unknown backend.')
diff --git a/torchaudio/csrc/CMakeLists.txt b/torchaudio/csrc/CMakeLists.txt
index 1bab67be5a..7d4d4e7741 100644
--- a/torchaudio/csrc/CMakeLists.txt
+++ b/torchaudio/csrc/CMakeLists.txt
@@ -60,7 +60,6 @@ if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
     _torchaudio
     SHARED
     pybind.cpp
-    sox/legacy.cpp
     ${LIBTORCHAUDIO_SOURCES}
     )
 
diff --git a/torchaudio/csrc/pybind.cpp b/torchaudio/csrc/pybind.cpp
index 373b9d0d96..fc17f7da5a 100644
--- a/torchaudio/csrc/pybind.cpp
+++ b/torchaudio/csrc/pybind.cpp
@@ -1,106 +1,8 @@
 #include <torch/extension.h>
 #include <torchaudio/csrc/sox/effects.h>
 #include <torchaudio/csrc/sox/io.h>
-#include <torchaudio/csrc/sox/legacy.h>
 
 PYBIND11_MODULE(_torchaudio, m) {
-  py::class_<sox_signalinfo_t>(m, "sox_signalinfo_t")
-      .def(py::init<>())
-      .def(
-          "__repr__",
-          [](const sox_signalinfo_t& self) {
-            std::stringstream ss;
-            ss << "sox_signalinfo_t {\n"
-               << "  rate-> " << self.rate << "\n"
-               << "  channels-> " << self.channels << "\n"
-               << "  precision-> " << self.precision << "\n"
-               << "  length-> " << self.length << "\n"
-               << "  mult-> " << self.mult << "\n"
-               << "}\n";
-            return ss.str();
-          })
-      .def_readwrite("rate", &sox_signalinfo_t::rate)
-      .def_readwrite("channels", &sox_signalinfo_t::channels)
-      .def_readwrite("precision", &sox_signalinfo_t::precision)
-      .def_readwrite("length", &sox_signalinfo_t::length)
-      .def_readwrite("mult", &sox_signalinfo_t::mult);
-  py::class_<sox_encodinginfo_t>(m, "sox_encodinginfo_t")
-      .def(py::init<>())
-      .def(
-          "__repr__",
-          [](const sox_encodinginfo_t& self) {
-            std::stringstream ss;
-            ss << "sox_encodinginfo_t {\n"
-               << "  encoding-> " << self.encoding << "\n"
-               << "  bits_per_sample-> " << self.bits_per_sample << "\n"
-               << "  compression-> " << self.compression << "\n"
-               << "  reverse_bytes-> " << self.reverse_bytes << "\n"
-               << "  reverse_nibbles-> " << self.reverse_nibbles << "\n"
-               << "  reverse_bits-> " << self.reverse_bits << "\n"
-               << "  opposite_endian-> " << self.opposite_endian << "\n"
-               << "}\n";
-            return ss.str();
-          })
-      .def_readwrite("encoding", &sox_encodinginfo_t::encoding)
-      .def_readwrite("bits_per_sample", &sox_encodinginfo_t::bits_per_sample)
-      .def_readwrite("compression", &sox_encodinginfo_t::compression)
-      .def_readwrite("reverse_bytes", &sox_encodinginfo_t::reverse_bytes)
-      .def_readwrite("reverse_nibbles", &sox_encodinginfo_t::reverse_nibbles)
-      .def_readwrite("reverse_bits", &sox_encodinginfo_t::reverse_bits)
-      .def_readwrite("opposite_endian", &sox_encodinginfo_t::opposite_endian);
-  py::enum_<sox_encoding_t>(m, "sox_encoding_t")
-      .value("SOX_ENCODING_UNKNOWN", sox_encoding_t::SOX_ENCODING_UNKNOWN)
-      .value("SOX_ENCODING_SIGN2", sox_encoding_t::SOX_ENCODING_SIGN2)
-      .value("SOX_ENCODING_UNSIGNED", sox_encoding_t::SOX_ENCODING_UNSIGNED)
-      .value("SOX_ENCODING_FLOAT", sox_encoding_t::SOX_ENCODING_FLOAT)
-      .value("SOX_ENCODING_FLOAT_TEXT", sox_encoding_t::SOX_ENCODING_FLOAT_TEXT)
-      .value("SOX_ENCODING_FLAC", sox_encoding_t::SOX_ENCODING_FLAC)
-      .value("SOX_ENCODING_HCOM", sox_encoding_t::SOX_ENCODING_HCOM)
-      .value("SOX_ENCODING_WAVPACK", sox_encoding_t::SOX_ENCODING_WAVPACK)
-      .value("SOX_ENCODING_WAVPACKF", sox_encoding_t::SOX_ENCODING_WAVPACKF)
-      .value("SOX_ENCODING_ULAW", sox_encoding_t::SOX_ENCODING_ULAW)
-      .value("SOX_ENCODING_ALAW", sox_encoding_t::SOX_ENCODING_ALAW)
-      .value("SOX_ENCODING_G721", sox_encoding_t::SOX_ENCODING_G721)
-      .value("SOX_ENCODING_G723", sox_encoding_t::SOX_ENCODING_G723)
-      .value("SOX_ENCODING_CL_ADPCM", sox_encoding_t::SOX_ENCODING_CL_ADPCM)
-      .value("SOX_ENCODING_CL_ADPCM16", sox_encoding_t::SOX_ENCODING_CL_ADPCM16)
-      .value("SOX_ENCODING_MS_ADPCM", sox_encoding_t::SOX_ENCODING_MS_ADPCM)
-      .value("SOX_ENCODING_IMA_ADPCM", sox_encoding_t::SOX_ENCODING_IMA_ADPCM)
-      .value("SOX_ENCODING_OKI_ADPCM", sox_encoding_t::SOX_ENCODING_OKI_ADPCM)
-      .value("SOX_ENCODING_DPCM", sox_encoding_t::SOX_ENCODING_DPCM)
-      .value("SOX_ENCODING_DWVW", sox_encoding_t::SOX_ENCODING_DWVW)
-      .value("SOX_ENCODING_DWVWN", sox_encoding_t::SOX_ENCODING_DWVWN)
-      .value("SOX_ENCODING_GSM", sox_encoding_t::SOX_ENCODING_GSM)
-      .value("SOX_ENCODING_MP3", sox_encoding_t::SOX_ENCODING_MP3)
-      .value("SOX_ENCODING_VORBIS", sox_encoding_t::SOX_ENCODING_VORBIS)
-      .value("SOX_ENCODING_AMR_WB", sox_encoding_t::SOX_ENCODING_AMR_WB)
-      .value("SOX_ENCODING_AMR_NB", sox_encoding_t::SOX_ENCODING_AMR_NB)
-      .value("SOX_ENCODING_LPC10", sox_encoding_t::SOX_ENCODING_LPC10)
-      //.value("SOX_ENCODING_OPUS", sox_encoding_t::SOX_ENCODING_OPUS)  //
-      // creates a compile error
-      .value("SOX_ENCODINGS", sox_encoding_t::SOX_ENCODINGS)
-      .export_values();
-  py::enum_<sox_option_t>(m, "sox_option_t")
-      .value("sox_option_no", sox_option_t::sox_option_no)
-      .value("sox_option_yes", sox_option_t::sox_option_yes)
-      .value("sox_option_default", sox_option_t::sox_option_default)
-      .export_values();
-  py::enum_<sox_bool>(m, "sox_bool")
-      .value("sox_false", sox_bool::sox_false)
-      .value("sox_true", sox_bool::sox_true)
-      .export_values();
-  m.def(
-      "read_audio_file",
-      &torch::audio::read_audio_file,
-      "Reads an audio file into a tensor");
-  m.def(
-      "write_audio_file",
-      &torch::audio::write_audio_file,
-      "Writes data from a tensor into an audio file");
-  m.def(
-      "get_info",
-      &torch::audio::get_info,
-      "Gets information about an audio file");
   m.def(
       "get_info_fileobj",
       &torchaudio::sox_io::get_info_fileobj,
diff --git a/torchaudio/csrc/sox/legacy.cpp b/torchaudio/csrc/sox/legacy.cpp
deleted file mode 100644
index 858ad15149..0000000000
--- a/torchaudio/csrc/sox/legacy.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-#include <torchaudio/csrc/sox/legacy.h>
-
-namespace torch {
-namespace audio {
-namespace {
-/// Helper struct to safely close the sox_format_t descriptor.
-struct SoxDescriptor {
-  explicit SoxDescriptor(sox_format_t* fd) noexcept : fd_(fd) {}
-  SoxDescriptor(const SoxDescriptor& other) = delete;
-  SoxDescriptor(SoxDescriptor&& other) = delete;
-  SoxDescriptor& operator=(const SoxDescriptor& other) = delete;
-  SoxDescriptor& operator=(SoxDescriptor&& other) = delete;
-  ~SoxDescriptor() {
-    if (fd_ != nullptr) {
-      sox_close(fd_);
-    }
-  }
-  sox_format_t* operator->() noexcept {
-    return fd_;
-  }
-  sox_format_t* get() noexcept {
-    return fd_;
-  }
-
- private:
-  sox_format_t* fd_;
-};
-
-int64_t write_audio(SoxDescriptor& fd, at::Tensor tensor) {
-  std::vector<sox_sample_t> buffer(tensor.numel());
-
-  AT_DISPATCH_ALL_TYPES(tensor.scalar_type(), "write_audio_buffer", [&] {
-    auto* data = tensor.data_ptr<scalar_t>();
-    std::copy(data, data + tensor.numel(), buffer.begin());
-  });
-
-  const auto samples_written =
-      sox_write(fd.get(), buffer.data(), buffer.size());
-
-  return samples_written;
-}
-
-void read_audio(SoxDescriptor& fd, at::Tensor output, int64_t buffer_length) {
-  std::vector<sox_sample_t> buffer(buffer_length);
-
-  int number_of_channels = fd->signal.channels;
-  const int64_t samples_read = sox_read(fd.get(), buffer.data(), buffer_length);
-  if (samples_read == 0) {
-    throw std::runtime_error(
-        "Error reading audio file: empty file or read failed in sox_read");
-  }
-
-  output.resize_({samples_read / number_of_channels, number_of_channels});
-  output = output.contiguous();
-
-  AT_DISPATCH_ALL_TYPES(output.scalar_type(), "read_audio_buffer", [&] {
-    auto* data = output.data_ptr<scalar_t>();
-    std::copy(buffer.begin(), buffer.begin() + samples_read, data);
-  });
-}
-} // namespace
-
-std::tuple<sox_signalinfo_t, sox_encodinginfo_t> get_info(
-    const std::string& file_name) {
-  SoxDescriptor fd(sox_open_read(
-      file_name.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/nullptr));
-  if (fd.get() == nullptr) {
-    throw std::runtime_error("Error opening audio file");
-  }
-  return std::make_tuple(fd->signal, fd->encoding);
-}
-
-int read_audio_file(
-    const std::string& file_name,
-    at::Tensor output,
-    bool ch_first,
-    int64_t nframes,
-    int64_t offset,
-    sox_signalinfo_t* si,
-    sox_encodinginfo_t* ei,
-    const char* ft) {
-  SoxDescriptor fd(sox_open_read(file_name.c_str(), si, ei, ft));
-  if (fd.get() == nullptr) {
-    throw std::runtime_error("Error opening audio file");
-  }
-
-  // signal info
-
-  const int number_of_channels = fd->signal.channels;
-  const int sample_rate = fd->signal.rate;
-  const int64_t total_length = fd->signal.length;
-
-  // multiply offset and number of frames by number of channels
-  offset *= number_of_channels;
-  nframes *= number_of_channels;
-
-  if (total_length == 0) {
-    throw std::runtime_error("Error reading audio file: unknown length");
-  }
-  if (offset > total_length) {
-    throw std::runtime_error("Offset past EOF");
-  }
-
-  // calculate buffer length
-  int64_t buffer_length = total_length;
-  if (offset > 0) {
-    buffer_length -= offset;
-  }
-  if (nframes > 0 && buffer_length > nframes) {
-    buffer_length = nframes;
-  }
-
-  // seek to offset point before reading data
-  if (sox_seek(fd.get(), offset, 0) == SOX_EOF) {
-    throw std::runtime_error(
-        "sox_seek reached EOF, try reducing offset or num_samples");
-  }
-
-  // read data and fill output tensor
-  read_audio(fd, output, buffer_length);
-
-  // L x C -> C x L, if desired
-  if (ch_first) {
-    output.transpose_(1, 0);
-  }
-
-  return sample_rate;
-}
-
-void write_audio_file(
-    const std::string& file_name,
-    const at::Tensor& tensor,
-    sox_signalinfo_t* si,
-    sox_encodinginfo_t* ei,
-    const char* file_type) {
-  if (!tensor.is_contiguous()) {
-    throw std::runtime_error(
-        "Error writing audio file: input tensor must be contiguous");
-  }
-
-#if SOX_LIB_VERSION_CODE >= 918272 // >= 14.3.0
-  si->mult = nullptr;
-#endif
-
-  SoxDescriptor fd(sox_open_write(
-      file_name.c_str(),
-      si,
-      ei,
-      file_type,
-      /*oob=*/nullptr,
-      /*overwrite=*/nullptr));
-
-  if (fd.get() == nullptr) {
-    throw std::runtime_error(
-        "Error writing audio file: could not open file for writing");
-  }
-
-  const auto samples_written = write_audio(fd, tensor);
-
-  if (samples_written != tensor.numel()) {
-    throw std::runtime_error(
-        "Error writing audio file: could not write entire buffer");
-  }
-}
-
-} // namespace audio
-} // namespace torch
diff --git a/torchaudio/csrc/sox/legacy.h b/torchaudio/csrc/sox/legacy.h
deleted file mode 100644
index 5869695bfe..0000000000
--- a/torchaudio/csrc/sox/legacy.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <sox.h>
-#include <torch/torch.h>
-
-namespace torch {
-namespace audio {
-
-/// Reads an audio file from the given `path` into the `output` `Tensor` and
-/// returns the sample rate of the audio file.
-/// Throws `std::runtime_error` if the audio file could not be opened, or an
-/// error occurred during reading of the audio data.
-int read_audio_file(
-    const std::string& file_name,
-    at::Tensor output,
-    bool ch_first,
-    int64_t nframes,
-    int64_t offset,
-    sox_signalinfo_t* si,
-    sox_encodinginfo_t* ei,
-    const char* ft);
-
-/// Writes the data of a `Tensor` into an audio file at the given `path`, with
-/// a certain extension (e.g. `wav`or `mp3`) and sample rate.
-/// Throws `std::runtime_error` when the audio file could not be opened for
-/// writing, or an error occurred during writing of the audio data.
-void write_audio_file(
-    const std::string& file_name,
-    const at::Tensor& tensor,
-    sox_signalinfo_t* si,
-    sox_encodinginfo_t* ei,
-    const char* file_type);
-
-/// Reads an audio file from the given `path` and returns a tuple of
-/// sox_signalinfo_t and sox_encodinginfo_t, which contain information about
-/// the audio file such as sample rate, length, bit precision, encoding and
-/// more. Throws `std::runtime_error` if the audio file could not be opened, or
-/// an error occurred during reading of the audio data.
-std::tuple<sox_signalinfo_t, sox_encodinginfo_t> get_info(
-    const std::string& file_name);
-} // namespace audio
-} // namespace torch
diff --git a/torchaudio/datasets/tedlium.py b/torchaudio/datasets/tedlium.py
index e8d1d1cefc..fb96ca5789 100644
--- a/torchaudio/datasets/tedlium.py
+++ b/torchaudio/datasets/tedlium.py
@@ -153,11 +153,7 @@ def _load_audio(self, path: str, start_time: float, end_time: float, sample_rate
         start_time = int(float(start_time) * sample_rate)
         end_time = int(float(end_time) * sample_rate)
 
-        backend = torchaudio.get_audio_backend()
-        if backend == "sox" or (backend == "soundfile" and torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE):
-            kwargs = {"offset": start_time, "num_frames": end_time - start_time}
-        else:
-            kwargs = {"frame_offset": start_time, "num_frames": end_time - start_time}
+        kwargs = {"frame_offset": start_time, "num_frames": end_time - start_time}
 
         return torchaudio.load(path, **kwargs)
 

From e44a1e03f468d93a515c97e065b446c675451417 Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Wed, 24 Feb 2021 19:09:17 +0000
Subject: [PATCH 2/4] Ignore flake8 error for import

---
 torchaudio/backend/sox_io_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 8b65f9020f..6849f87691 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -10,7 +10,7 @@
 from .common import AudioMetaData
 
 if _mod_utils.is_module_available('torchaudio._torchaudio'):
-    from torchaudio import _torchaudio
+    from torchaudio import _torchaudio  # noqa
 
 
 @_mod_utils.requires_module('torchaudio._torchaudio')

From 10e16e252e12343aab43b09c45755b95450fe624 Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Wed, 24 Feb 2021 19:50:18 +0000
Subject: [PATCH 3/4] Addressed review comments.

---
 docs/source/backend.rst              | 8 --------
 torchaudio/backend/sox_io_backend.py | 3 ---
 torchaudio/extension/extension.py    | 1 +
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/docs/source/backend.rst b/docs/source/backend.rst
index 9ce1f071b1..cfecf2e8e4 100644
--- a/docs/source/backend.rst
+++ b/docs/source/backend.rst
@@ -23,14 +23,6 @@ Availability
 
 ``"soundfile"`` backend requires ``SoundFile``. Please refer to `the SoundFile documentation <https://pysoundfile.readthedocs.io/en/latest/>`_ for the installation.
 
- +--------------------+-----------------------+------------------------+
- | **Backend**        | **0.8.0**             | **0.9.0**              |
- +====================+=======================+========================+
- | ``"sox_io"``       | Default on Linx/macOS | Default on Linux/macOS |
- +--------------------+-----------------------+------------------------+
- | ``"soundfile"``    | Default on Windows    | Default on Windows     |
- +--------------------+-----------------------+------------------------+
-
 Common Data Structure
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 6849f87691..ecfd5ebd42 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -9,9 +9,6 @@
 import torchaudio
 from .common import AudioMetaData
 
-if _mod_utils.is_module_available('torchaudio._torchaudio'):
-    from torchaudio import _torchaudio  # noqa
-
 
 @_mod_utils.requires_module('torchaudio._torchaudio')
 def info(
diff --git a/torchaudio/extension/extension.py b/torchaudio/extension/extension.py
index 5875f41023..5af62f19be 100644
--- a/torchaudio/extension/extension.py
+++ b/torchaudio/extension/extension.py
@@ -9,6 +9,7 @@ def _init_extension():
     ext = 'torchaudio._torchaudio'
     if _mod_utils.is_module_available(ext):
         _init_script_module(ext)
+        import torchaudio._torchaudio  # noqa
     else:
         warnings.warn('torchaudio C++ extension is not available.')
 

From d24cfacac939d2ba160a575b686dfb6cee14b0c1 Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Wed, 24 Feb 2021 20:29:12 +0000
Subject: [PATCH 4/4] Fixed failure in build doc.

---
 docs/source/backend.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/backend.rst b/docs/source/backend.rst
index cfecf2e8e4..f185d171b0 100644
--- a/docs/source/backend.rst
+++ b/docs/source/backend.rst
@@ -66,6 +66,8 @@ save
 
 .. autofunction:: torchaudio.backend.sox_io_backend.save
 
+.. _soundfile_backend:
+
 Soundfile Backend
 ~~~~~~~~~~~~~~~~~