Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dcshift to functional #558

Merged
merged 6 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ Functions to perform common audio operations.

.. autofunction:: contrast

:hidden:`dcshift`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: dcshift

:hidden:`mask_along_axis`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
4 changes: 4 additions & 0 deletions test/test_batch_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def test_contrast(self):
waveform = torch.rand(2, 100) - 0.5
_test_batch(F.contrast, waveform, enhancement_amount=80.)

def test_dcshift(self):
waveform = torch.rand(2, 100) - 0.5
_test_batch(F.dcshift, waveform, shift=0.5, limiter_gain=0.05)


class TestTransforms(unittest.TestCase):
"""Test suite for classes defined in `transforms` module"""
Expand Down
125 changes: 81 additions & 44 deletions test/test_sox_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@ def test_lowpass(self):
Test biquad lowpass filter, compare to SoX implementation
"""

CUTOFF_FREQ = 3000
mthrok marked this conversation as resolved.
Show resolved Hide resolved
cutoff_freq = 3000

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("lowpass", [CUTOFF_FREQ])
E.append_effect_to_chain("lowpass", [cutoff_freq])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.lowpass_biquad(waveform, sample_rate, CUTOFF_FREQ)
output_waveform = F.lowpass_biquad(waveform, sample_rate, cutoff_freq)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -98,16 +98,16 @@ def test_highpass(self):
Test biquad highpass filter, compare to SoX implementation
"""

CUTOFF_FREQ = 2000
cutoff_freq = 2000

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("highpass", [CUTOFF_FREQ])
E.append_effect_to_chain("highpass", [cutoff_freq])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.highpass_biquad(waveform, sample_rate, CUTOFF_FREQ)
output_waveform = F.highpass_biquad(waveform, sample_rate, cutoff_freq)

# TBD - this fails at the 1e-4 level, debug why
torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-3, rtol=1e-5)
Expand All @@ -119,17 +119,17 @@ def test_allpass(self):
Test biquad allpass filter, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
central_freq = 1000
q = 0.707

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("allpass", [CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("allpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.allpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)
output_waveform = F.allpass_biquad(waveform, sample_rate, central_freq, q)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -140,18 +140,18 @@ def test_bandpass_with_csg(self):
Test biquad bandpass filter, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
CONST_SKIRT_GAIN = True
central_freq = 1000
q = 0.707
const_skirt_gain = True

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandpass", ["-c", CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("bandpass", ["-c", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -162,18 +162,18 @@ def test_bandpass_without_csg(self):
Test biquad bandpass filter, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
CONST_SKIRT_GAIN = False
central_freq = 1000
q = 0.707
const_skirt_gain = False

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandpass", [CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("bandpass", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandpass_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, CONST_SKIRT_GAIN)
output_waveform = F.bandpass_biquad(waveform, sample_rate, central_freq, q, const_skirt_gain)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -184,17 +184,17 @@ def test_bandreject(self):
Test biquad bandreject filter, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
central_freq = 1000
q = 0.707

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("bandreject", [CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("bandreject", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.bandreject_biquad(waveform, sample_rate, CENTRAL_FREQ, Q)
output_waveform = F.bandreject_biquad(waveform, sample_rate, central_freq, q)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -205,18 +205,18 @@ def test_band_with_noise(self):
Test biquad band filter with noise mode, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
NOISE = True
central_freq = 1000
q = 0.707
noise = True

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("band", ["-n", CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("band", ["-n", central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -227,18 +227,18 @@ def test_band_without_noise(self):
Test biquad band filter without noise mode, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
NOISE = False
central_freq = 1000
q = 0.707
noise = False

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("band", [CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("band", [central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.band_biquad(waveform, sample_rate, CENTRAL_FREQ, Q, NOISE)
output_waveform = F.band_biquad(waveform, sample_rate, central_freq, q, noise)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand All @@ -249,18 +249,18 @@ def test_treble(self):
Test biquad treble filter, compare to SoX implementation
"""

CENTRAL_FREQ = 1000
Q = 0.707
GAIN = 40
central_freq = 1000
q = 0.707
gain = 40

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("treble", [GAIN, CENTRAL_FREQ, str(Q) + 'q'])
E.append_effect_to_chain("treble", [gain, central_freq, str(q) + 'q'])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.treble_biquad(waveform, sample_rate, GAIN, CENTRAL_FREQ, Q)
output_waveform = F.treble_biquad(waveform, sample_rate, gain, central_freq, q)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand Down Expand Up @@ -318,25 +318,62 @@ def test_contrast(self):

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

@unittest.skipIf("sox" not in BACKENDS, "sox not available")
@AudioBackendScope("sox")
def test_dcshift_with_limiter(self):
"""
Test dcshift effect, compare to SoX implementation
"""
shift = 0.5
limiter_gain = 0.05
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("dcshift", [shift, limiter_gain])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, _ = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.dcshift(waveform, shift, limiter_gain)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

@unittest.skipIf("sox" not in BACKENDS, "sox not available")
@AudioBackendScope("sox")
def test_dcshift_without_limiter(self):
"""
Test dcshift effect, compare to SoX implementation
"""
shift = 0.6
noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("dcshift", [shift])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, _ = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.dcshift(waveform, shift)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

@unittest.skipIf("sox" not in BACKENDS, "sox not available")
@AudioBackendScope("sox")
def test_equalizer(self):
"""
Test biquad peaking equalizer filter, compare to SoX implementation
"""

CENTER_FREQ = 300
Q = 0.707
GAIN = 1
center_freq = 300
q = 0.707
gain = 1

noise_filepath = common_utils.get_asset_path('whitenoise.wav')
E = torchaudio.sox_effects.SoxEffectsChain()
E.set_input_file(noise_filepath)
E.append_effect_to_chain("equalizer", [CENTER_FREQ, Q, GAIN])
E.append_effect_to_chain("equalizer", [center_freq, q, gain])
sox_output_waveform, sr = E.sox_build_flow_effects()

waveform, sample_rate = torchaudio.load(noise_filepath, normalization=True)
output_waveform = F.equalizer_biquad(waveform, sample_rate, CENTER_FREQ, GAIN, Q)
output_waveform = F.equalizer_biquad(waveform, sample_rate, center_freq, gain, q)

torch.testing.assert_allclose(output_waveform, sox_output_waveform, atol=1e-4, rtol=1e-5)

Expand Down
11 changes: 11 additions & 0 deletions test/test_torchscript_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,17 @@ def func(tensor):

self._assert_consistency(func, waveform)

def test_dcshift(self):
filepath = common_utils.get_asset_path("whitenoise.wav")
waveform, _ = torchaudio.load(filepath, normalization=True)

def func(tensor):
shift = 0.5
limiter_gain = 0.05
return F.dcshift(tensor, shift, limiter_gain)

self._assert_consistency(func, waveform)


class _TransformsTestMixin:
"""Implements test for Transforms that are performed for different devices"""
Expand Down
46 changes: 46 additions & 0 deletions torchaudio/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,52 @@ def contrast(
return output_waveform


def dcshift(
waveform: Tensor,
shift: float,
limiter_gain: float = 0.
) -> Tensor:
r"""Apply a DC shift to the audio. Similar to SoX implementation.
This can be useful to remove a DC offset
(caused perhaps by a hardware problem in the recording chain) from the audio

Args:
waveform (Tensor): audio waveform of dimension of `(..., time)`
shift (float): indicates the amount to shift the audio
Allowed range of values for shift : -2.0 to +2.0
limiter_gain (float): It is used only on peaks to prevent clipping
It should have a value much less than 1 (e.g. 0.05 or 0.02)

Returns:
Tensor: Waveform of dimension of `(..., time)`

References:
http://sox.sourceforge.net/sox.html
"""
output_waveform = waveform
limiter_threshold = 0.
use_limiter = False

if limiter_gain != 0.0:
mthrok marked this conversation as resolved.
Show resolved Hide resolved
use_limiter = True
limiter_threshold = 1.0 - (abs(shift) - limiter_gain)

if use_limiter and shift > 0:
mask = waveform > limiter_threshold
temp = (waveform[mask] - limiter_threshold) * limiter_gain / (1 - limiter_threshold)
output_waveform[mask] = (temp + limiter_threshold + shift).clamp(max=limiter_threshold)
output_waveform[~mask] = (waveform[~mask] + shift).clamp(min=-1, max=+1)
mthrok marked this conversation as resolved.
Show resolved Hide resolved
elif use_limiter and shift < 0:
mask = waveform < -limiter_threshold
temp = (waveform[mask] + limiter_threshold) * limiter_gain / (1 - limiter_threshold)
output_waveform[mask] = (temp - limiter_threshold + shift).clamp(min=-limiter_threshold)
output_waveform[~mask] = (waveform[~mask] + shift).clamp(min=-1, max=+1)
mthrok marked this conversation as resolved.
Show resolved Hide resolved
else:
output_waveform = (waveform + shift).clamp(min=-1, max=+1)
mthrok marked this conversation as resolved.
Show resolved Hide resolved

return output_waveform


def mask_along_axis_iid(
specgrams: Tensor,
mask_param: int,
Expand Down