pytorch · vincentqb · Apr 20, 2020 · Apr 17, 2020 · Apr 17, 2020 · Apr 17, 2020
diff --git a/docs/source/functional.rst b/docs/source/functional.rst
@@ -128,6 +128,11 @@ Functions to perform common audio operations.
 
 .. autofunction:: contrast
 
+:hidden:`dcshift`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: dcshift
+
 :hidden:`mask_along_axis`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/test/test_batch_consistency.py b/test/test_batch_consistency.py
@@ -69,6 +69,10 @@ def test_contrast(self):
         waveform = torch.rand(2, 100) - 0.5
         _test_batch(F.contrast, waveform, enhancement_amount=80.)
 
+    def test_dcshift(self):
+        waveform = torch.rand(2, 100) - 0.5
+        _test_batch(F.dcshift, waveform, shift=0.5, limiter_gain=0.05)
+
 
 class TestTransforms(unittest.TestCase):
     """Test suite for classes defined in `transforms` module"""

diff --git a/test/test_sox_compatibility.py b/test/test_sox_compatibility.py
@@ -78,16 +78,16 @@ def test_lowpass(self):
         Test biquad lowpass filter, compare to SoX implementation
         """
 
-        CUTOFF_FREQ = 3000
+        cutoff_freq = 3000
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("lowpass", [CUTOFF_FREQ])
+        E.append_effect_to_chain("lowpass", [cutoff_freq])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.lowpass_biquad(waveform, sample_rate, CUTOFF_FREQ)
+        output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -98,16 +98,16 @@ def test_highpass(self):
         Test biquad highpass filter, compare to SoX implementation
         """
 
-        CUTOFF_FREQ = 2000
+        cutoff_freq = 2000
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("highpass", [CUTOFF_FREQ])
+        E.append_effect_to_chain("highpass", [cutoff_freq])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.highpass_biquad(waveform, sample_rate, CUTOFF_FREQ)
+        output_waveform = F.highpass_biquad(waveform, sample_rate, cutoff_freq)
 
         # TBD - this fails at the 1e-4 level, debug why
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-3, rtol=1e-5)
@@ -119,17 +119,17 @@ def test_allpass(self):
         Test biquad allpass filter, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
+        central_freq = 1000
+        q = 0.707
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("allpass", [CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.allpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)
+        output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -140,18 +140,18 @@ def test_bandpass_with_csg(self):
         Test biquad bandpass filter, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
-        CONST_SKIRT_GAIN = True
+        central_freq = 1000
+        q = 0.707
+        const_skirt_gain = True
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("bandpass", ["-c", CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)
+        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -162,18 +162,18 @@ def test_bandpass_without_csg(self):
         Test biquad bandpass filter, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
-        CONST_SKIRT_GAIN = False
+        central_freq = 1000
+        q = 0.707
+        const_skirt_gain = False
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("bandpass", [CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)
+        output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -184,17 +184,17 @@ def test_bandreject(self):
         Test biquad bandreject filter, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
+        central_freq = 1000
+        q = 0.707
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("bandreject", [CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.bandreject_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)
+        output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -205,18 +205,18 @@ def test_band_with_noise(self):
         Test biquad band filter with noise mode, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
-        NOISE = True
+        central_freq = 1000
+        q = 0.707
+        noise = True
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("band", ["-n", CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+        output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -227,18 +227,18 @@ def test_band_without_noise(self):
         Test biquad band filter without noise mode, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
-        NOISE = False
+        central_freq = 1000
+        q = 0.707
+        noise = False
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("band", [CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("band", [central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
+        output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -249,18 +249,18 @@ def test_treble(self):
         Test biquad treble filter, compare to SoX implementation
         """
 
-        CENTRAL_FREQ = 1000
-        Q = 0.707
-        GAIN = 40
+        central_freq = 1000
+        q = 0.707
+        gain = 40
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("treble", [GAIN, CENTRAL_FREQ, str(Q) + 'q'])
+        E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.treble_biquad(waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
+        output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
@@ -318,25 +318,62 @@ def test_contrast(self):
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 
+    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
+    @AudioBackendScope("sox")
+    def test_dcshift_with_limiter(self):
+        """
+        Test dcshift effect, compare to SoX implementation
+        """
+        shift = 0.5
+        limiter_gain = 0.05
+        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("dcshift", [shift, limiter_gain])
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+
+        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.dcshift(waveform, shift, limiter_gain)
+
+        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
+
+    @unittest.skipIf("sox" not in BACKENDS, "sox not available")
+    @AudioBackendScope("sox")
+    def test_dcshift_without_limiter(self):
+        """
+        Test dcshift effect, compare to SoX implementation
+        """
+        shift = 0.6
+        noise_filepath = common_utils.get_asset_path('whitenoise.wav')
+        E = torchaudio.sox_effects.SoxEffectsChain()
+        E.set_input_file(noise_filepath)
+        E.append_effect_to_chain("dcshift", [shift])
+        sox_output_waveform, sr = E.sox_build_flow_effects()
+
+        waveform, _ = torchaudio.load(noise_filepath, normalization=True)
+        output_waveform = F.dcshift(waveform, shift)
+
+        torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
+
     @unittest.skipIf("sox" not in BACKENDS, "sox not available")
     @AudioBackendScope("sox")
     def test_equalizer(self):
         """
         Test biquad peaking equalizer filter, compare to SoX implementation
         """
 
-        CENTER_FREQ = 300
-        Q = 0.707
-        GAIN = 1
+        center_freq = 300
+        q = 0.707
+        gain = 1
 
         noise_filepath = common_utils.get_asset_path('whitenoise.wav')
         E = torchaudio.sox_effects.SoxEffectsChain()
         E.set_input_file(noise_filepath)
-        E.append_effect_to_chain("equalizer", [CENTER_FREQ, Q, GAIN])
+        E.append_effect_to_chain("equalizer", [center_freq, q, gain])
         sox_output_waveform, sr = E.sox_build_flow_effects()
 
         waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
-        output_waveform = F.equalizer_biquad(waveform, sample_rate, CENTER_FREQ, GAIN, Q)
+        output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)
 
         torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)
 

diff --git a/test/test_torchscript_consistency.py b/test/test_torchscript_consistency.py
@@ -416,6 +416,17 @@ def func(tensor):
 
         self._assert_consistency(func, waveform)
 
+    def test_dcshift(self):
+        filepath = common_utils.get_asset_path("whitenoise.wav")
+        waveform, _ = torchaudio.load(filepath, normalization=True)
+
+        def func(tensor):
+            shift = 0.5
+            limiter_gain = 0.05
+            return F.dcshift(tensor, shift, limiter_gain)
+
+        self._assert_consistency(func, waveform)
+
 
 class _TransformsTestMixin:
     """Implements test for Transforms that are performed for different devices"""

diff --git a/torchaudio/functional.py b/torchaudio/functional.py
@@ -1193,6 +1193,52 @@ def contrast(
     return output_waveform
 
 
+def dcshift(
+        waveform: Tensor,
+        shift: float,
+        limiter_gain: float = 0.
+) -> Tensor:
+    r"""Apply a DC shift to the audio. Similar to SoX implementation.
+    This can be useful to remove a DC offset
+    (caused perhaps by a hardware problem in the recording chain) from the audio
+
+    Args:
+        waveform (Tensor): audio waveform of dimension of `(..., time)`
+        shift (float): indicates the amount to shift the audio
+            Allowed range of values for shift : -2.0 to +2.0
+        limiter_gain (float): It is used only on peaks to prevent clipping
+            It should have a value much less than 1 (e.g. 0.05 or 0.02)
+
+    Returns:
+        Tensor: Waveform of dimension of `(..., time)`
+
+    References:
+        http://sox.sourceforge.net/sox.html
+    """
+    output_waveform = waveform
+    limiter_threshold = 0.
+    use_limiter = False
+
+    if limiter_gain != 0.0:
+        use_limiter = True
+        limiter_threshold = 1.0 - (abs(shift) - limiter_gain)
+
+    if use_limiter and shift > 0:
+        mask = waveform > limiter_threshold
+        temp = (waveform[mask] - limiter_threshold) * limiter_gain / (1 - limiter_threshold)
+        output_waveform[mask] = (temp + limiter_threshold + shift).clamp(max=limiter_threshold)
+        output_waveform[~mask] = (waveform[~mask] + shift).clamp(min=-1, max=+1)
+    elif use_limiter and shift < 0:
+        mask = waveform < -limiter_threshold
+        temp = (waveform[mask] + limiter_threshold) * limiter_gain / (1 - limiter_threshold)
+        output_waveform[mask] = (temp - limiter_threshold + shift).clamp(min=-limiter_threshold)
+        output_waveform[~mask] = (waveform[~mask] + shift).clamp(min=-1, max=+1)
+    else:
+        output_waveform = (waveform + shift).clamp(min=-1, max=+1)
+
+    return output_waveform
+
+
 def mask_along_axis_iid(
         specgrams: Tensor,
         mask_param: int,