Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename SpectrogramToDB to AmplitudeToDB #170

Merged
merged 3 commits into from Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 5 additions & 5 deletions test/test_jit.py
Expand Up @@ -78,11 +78,11 @@ def test_scriptmodule_MelScale(self):

self._test_script_module(spec_f, transforms.MelScale)

def test_torchscript_spectrogram_to_DB(self):
def test_torchscript_amplitude_to_DB(self):
@torch.jit.script
def jit_method(spec, multiplier, amin, db_multiplier, top_db):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
return F.spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db)
return F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)

spec = torch.rand((6, 201))
multiplier = 10.
Expand All @@ -91,15 +91,15 @@ def jit_method(spec, multiplier, amin, db_multiplier, top_db):
top_db = 80.

jit_out = jit_method(spec, multiplier, amin, db_multiplier, top_db)
py_out = F.spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db)
py_out = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)

self.assertTrue(torch.allclose(jit_out, py_out))

@unittest.skipIf(not RUN_CUDA, "no CUDA")
def test_scriptmodule_SpectrogramToDB(self):
def test_scriptmodule_AmplitudeToDB(self):
spec = torch.rand((6, 201), device="cuda")

self._test_script_module(spec, transforms.SpectrogramToDB)
self._test_script_module(spec, transforms.AmplitudeToDB)

def test_torchscript_create_dct(self):
@torch.jit.script
Expand Down
4 changes: 2 additions & 2 deletions test/test_transforms.py
Expand Up @@ -52,7 +52,7 @@ def test_mu_law_companding(self):

def test_mel2(self):
top_db = 80.
s2db = transforms.SpectrogramToDB('power', top_db)
s2db = transforms.AmplitudeToDB('power', top_db)

waveform = self.waveform.clone() # (1, 16000)
waveform_scaled = self.scale(waveform) # (1, 16000)
Expand Down Expand Up @@ -155,7 +155,7 @@ def _test_librosa_consistency_helper(n_fft, hop_length, power, n_mels, n_mfcc, s
self.assertTrue(torch.allclose(torch_mel.type(librosa_mel_tensor.dtype), librosa_mel_tensor, atol=5e-3))

# test s2db
db_transform = torchaudio.transforms.SpectrogramToDB('power', 80.)
db_transform = torchaudio.transforms.AmplitudeToDB('power', 80.)
db_torch = db_transform(spect_transform(sound)).squeeze().cpu()
db_librosa = librosa.core.spectrum.power_to_db(out_librosa)
self.assertTrue(torch.allclose(db_torch, torch.from_numpy(db_librosa), atol=5e-3))
Expand Down
26 changes: 13 additions & 13 deletions torchaudio/functional.py
Expand Up @@ -5,7 +5,7 @@
__all__ = [
'istft',
'spectrogram',
'spectrogram_to_DB',
'amplitude_to_DB',
'create_fb_matrix',
'create_dct',
'mu_law_encoding',
Expand Down Expand Up @@ -207,34 +207,34 @@ def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, nor


@torch.jit.script
def spectrogram_to_DB(specgram, multiplier, amin, db_multiplier, top_db=None):
def amplitude_to_DB(x, multiplier, amin, db_multiplier, top_db=None):
# type: (Tensor, float, float, float, Optional[float]) -> Tensor
r"""Turns a spectrogram from the power/amplitude scale to the decibel scale.
r"""Turns a tensor from the power/amplitude scale to the decibel scale.

This output depends on the maximum value in the input spectrogram, and so
This output depends on the maximum value in the input tensor, and so
may return different values for an audio clip split into snippets vs. a
a full clip.

Args:
specgram (torch.Tensor): Normal STFT of size (c, f, t)
x (torch.Tensor): Input tensor before being converted to decibel scale
multiplier (float): Use 10. for power and 20. for amplitude
amin (float): Number to clamp specgram
amin (float): Number to clamp ``x``
db_multiplier (float): Log10(max(reference value and amin))
top_db (Optional[float]): Minimum negative cut-off in decibels. A reasonable number
is 80.

Returns:
torch.Tensor: Spectrogram in DB of size (c, f, t)
torch.Tensor: Output tensor in decibel scale
"""
specgram_db = multiplier * torch.log10(torch.clamp(specgram, min=amin))
specgram_db -= multiplier * db_multiplier
x_db = multiplier * torch.log10(torch.clamp(x, min=amin))
x_db -= multiplier * db_multiplier

if top_db is not None:
new_spec_db_max = torch.tensor(float(specgram_db.max()) - top_db,
dtype=specgram_db.dtype, device=specgram_db.device)
specgram_db = torch.max(specgram_db, new_spec_db_max)
new_x_db_max = torch.tensor(float(x_db.max()) - top_db,
dtype=x_db.dtype, device=x_db.device)
x_db = torch.max(x_db, new_x_db_max)

return specgram_db
return x_db


@torch.jit.script
Expand Down
24 changes: 12 additions & 12 deletions torchaudio/transforms.py
Expand Up @@ -9,7 +9,7 @@

__all__ = [
'Spectrogram',
'SpectrogramToDB',
'AmplitudeToDB',
'MelScale',
'MelSpectrogram',
'MFCC',
Expand Down Expand Up @@ -67,23 +67,23 @@ def forward(self, waveform):
self.win_length, self.power, self.normalized)


class SpectrogramToDB(torch.jit.ScriptModule):
r"""Turns a spectrogram from the power/amplitude scale to the decibel scale.
class AmplitudeToDB(torch.jit.ScriptModule):
r"""Turns a tensor from the power/amplitude scale to the decibel scale.

This output depends on the maximum value in the input spectrogram, and so
This output depends on the maximum value in the input tensor, and so
may return different values for an audio clip split into snippets vs. a
a full clip.

Args:
stype (str): scale of input spectrogram ('power' or 'magnitude'). The
stype (str): scale of input tensor ('power' or 'magnitude'). The
power being the elementwise square of the magnitude. (Default: 'power')
top_db (float, optional): minimum negative cut-off in decibels. A reasonable number
is 80.
"""
__constants__ = ['multiplier', 'amin', 'ref_value', 'db_multiplier']

def __init__(self, stype='power', top_db=None):
super(SpectrogramToDB, self).__init__()
super(AmplitudeToDB, self).__init__()
self.stype = torch.jit.Attribute(stype, str)
if top_db is not None and top_db < 0:
raise ValueError('top_db must be positive value')
Expand All @@ -94,17 +94,17 @@ def __init__(self, stype='power', top_db=None):
self.db_multiplier = math.log10(max(self.amin, self.ref_value))

@torch.jit.script_method
def forward(self, specgram):
def forward(self, x):
r"""Numerically stable implementation from Librosa
https://librosa.github.io/librosa/_modules/librosa/core/spectrum.html

Args:
specgram (torch.Tensor): STFT of size (c, f, t)
x (torch.Tensor): Input tensor before being converted to decibel scale

Returns:
torch.Tensor: STFT after changing scale of size (c, f, t)
torch.Tensor: Output tensor in decibel scale
"""
return F.spectrogram_to_DB(specgram, self.multiplier, self.amin, self.db_multiplier, self.top_db)
return F.amplitude_to_DB(x, self.multiplier, self.amin, self.db_multiplier, self.top_db)


class MelScale(torch.jit.ScriptModule):
Expand Down Expand Up @@ -246,7 +246,7 @@ def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm='ortho', log_m
self.dct_type = dct_type
self.norm = torch.jit.Attribute(norm, Optional[str])
self.top_db = 80.0
self.spectrogram_to_DB = SpectrogramToDB('power', self.top_db)
self.amplitude_to_DB = AmplitudeToDB('power', self.top_db)

if melkwargs is not None:
self.MelSpectrogram = MelSpectrogram(sample_rate=self.sample_rate, **melkwargs)
Expand All @@ -273,7 +273,7 @@ def forward(self, waveform):
log_offset = 1e-6
mel_specgram = torch.log(mel_specgram + log_offset)
else:
mel_specgram = self.spectrogram_to_DB(mel_specgram)
mel_specgram = self.amplitude_to_DB(mel_specgram)
# (c, `n_mels`, t).tranpose(...) dot (`n_mels`, `n_mfcc`) -> (c, t, `n_mfcc`).tranpose(...)
mfcc = torch.matmul(mel_specgram.transpose(1, 2), self.dct_mat).transpose(1, 2)
return mfcc
Expand Down