Rename SpectrogramToDB to AmplitudeToDB (#170)

pytorch · Jul 26, 2019 · 873af31 · 873af31
1 parent d3fe2a7
commit 873af31
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 32 deletions.
diff --git a/test/test_jit.py b/test/test_jit.py
@@ -78,11 +78,11 @@ def test_scriptmodule_MelScale(self):
 
         self._test_script_module(spec_f, transforms.MelScale)
 
-    def test_torchscript_spectrogram_to_DB(self):
+    def test_torchscript_amplitude_to_DB(self):
         @torch.jit.script
         def jit_method(spec, multiplier, amin, db_multiplier, top_db):
             # type: (Tensor, float, float, float, Optional[float]) -> Tensor
-            return F.spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db)
+            return F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
 
         spec = torch.rand((6, 201))
         multiplier = 10.
@@ -91,15 +91,15 @@ def jit_method(spec, multiplier, amin, db_multiplier, top_db):
         top_db = 80.
 
         jit_out = jit_method(spec, multiplier, amin, db_multiplier, top_db)
-        py_out = F.spectrogram_to_DB(spec, multiplier, amin, db_multiplier, top_db)
+        py_out = F.amplitude_to_DB(spec, multiplier, amin, db_multiplier, top_db)
 
         self.assertTrue(torch.allclose(jit_out, py_out))
 
     @unittest.skipIf(not RUN_CUDA, "no CUDA")
-    def test_scriptmodule_SpectrogramToDB(self):
+    def test_scriptmodule_AmplitudeToDB(self):
         spec = torch.rand((6, 201), device="cuda")
 
-        self._test_script_module(spec, transforms.SpectrogramToDB)
+        self._test_script_module(spec, transforms.AmplitudeToDB)
 
     def test_torchscript_create_dct(self):
         @torch.jit.script

diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -52,7 +52,7 @@ def test_mu_law_companding(self):
 
     def test_mel2(self):
         top_db = 80.
-        s2db = transforms.SpectrogramToDB('power', top_db)
+        s2db = transforms.AmplitudeToDB('power', top_db)
 
         waveform = self.waveform.clone()  # (1, 16000)
         waveform_scaled = self.scale(waveform)  # (1, 16000)
@@ -155,7 +155,7 @@ def _test_librosa_consistency_helper(n_fft, hop_length, power, n_mels, n_mfcc, s
             self.assertTrue(torch.allclose(torch_mel.type(librosa_mel_tensor.dtype), librosa_mel_tensor, atol=5e-3))
 
             # test s2db
-            db_transform = torchaudio.transforms.SpectrogramToDB('power', 80.)
+            db_transform = torchaudio.transforms.AmplitudeToDB('power', 80.)
             db_torch = db_transform(spect_transform(sound)).squeeze().cpu()
             db_librosa = librosa.core.spectrum.power_to_db(out_librosa)
             self.assertTrue(torch.allclose(db_torch, torch.from_numpy(db_librosa), atol=5e-3))

diff --git a/torchaudio/functional.py b/torchaudio/functional.py
@@ -5,7 +5,7 @@
 __all__ = [
     'istft',
     'spectrogram',
-    'spectrogram_to_DB',
+    'amplitude_to_DB',
     'create_fb_matrix',
     'create_dct',
     'mu_law_encoding',
@@ -207,34 +207,34 @@ def spectrogram(waveform, pad, window, n_fft, hop_length, win_length, power, nor
 
 
 @torch.jit.script
-def spectrogram_to_DB(specgram, multiplier, amin, db_multiplier, top_db=None):
+def amplitude_to_DB(x, multiplier, amin, db_multiplier, top_db=None):
     # type: (Tensor, float, float, float, Optional[float]) -> Tensor
-    r"""Turns a spectrogram from the power/amplitude scale to the decibel scale.
+    r"""Turns a tensor from the power/amplitude scale to the decibel scale.
 
-    This output depends on the maximum value in the input spectrogram, and so
+    This output depends on the maximum value in the input tensor, and so
     may return different values for an audio clip split into snippets vs. a
     a full clip.
 
     Args:
-        specgram (torch.Tensor): Normal STFT of size (c, f, t)
+        x (torch.Tensor): Input tensor before being converted to decibel scale
         multiplier (float): Use 10. for power and 20. for amplitude
-        amin (float): Number to clamp specgram
+        amin (float): Number to clamp ``x``
         db_multiplier (float): Log10(max(reference value and amin))
         top_db (Optional[float]): Minimum negative cut-off in decibels. A reasonable number
             is 80.
 
     Returns:
-        torch.Tensor: Spectrogram in DB of size (c, f, t)
+        torch.Tensor: Output tensor in decibel scale
     """
-    specgram_db = multiplier * torch.log10(torch.clamp(specgram, min=amin))
-    specgram_db -= multiplier * db_multiplier
+    x_db = multiplier * torch.log10(torch.clamp(x, min=amin))
+    x_db -= multiplier * db_multiplier
 
     if top_db is not None:
-        new_spec_db_max = torch.tensor(float(specgram_db.max()) - top_db,
-                                       dtype=specgram_db.dtype, device=specgram_db.device)
-        specgram_db = torch.max(specgram_db, new_spec_db_max)
+        new_x_db_max = torch.tensor(float(x_db.max()) - top_db,
+                                    dtype=x_db.dtype, device=x_db.device)
+        x_db = torch.max(x_db, new_x_db_max)
 
-    return specgram_db
+    return x_db
 
 
 @torch.jit.script

diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py
@@ -9,7 +9,7 @@
 
 __all__ = [
     'Spectrogram',
-    'SpectrogramToDB',
+    'AmplitudeToDB',
     'MelScale',
     'MelSpectrogram',
     'MFCC',
@@ -67,23 +67,23 @@ def forward(self, waveform):
                              self.win_length, self.power, self.normalized)
 
 
-class SpectrogramToDB(torch.jit.ScriptModule):
-    r"""Turns a spectrogram from the power/amplitude scale to the decibel scale.
+class AmplitudeToDB(torch.jit.ScriptModule):
+    r"""Turns a tensor from the power/amplitude scale to the decibel scale.
 
-    This output depends on the maximum value in the input spectrogram, and so
+    This output depends on the maximum value in the input tensor, and so
     may return different values for an audio clip split into snippets vs. a
     a full clip.
 
     Args:
-        stype (str): scale of input spectrogram ('power' or 'magnitude'). The
+        stype (str): scale of input tensor ('power' or 'magnitude'). The
             power being the elementwise square of the magnitude. (Default: 'power')
         top_db (float, optional): minimum negative cut-off in decibels.  A reasonable number
             is 80.
     """
     __constants__ = ['multiplier', 'amin', 'ref_value', 'db_multiplier']
 
     def __init__(self, stype='power', top_db=None):
-        super(SpectrogramToDB, self).__init__()
+        super(AmplitudeToDB, self).__init__()
         self.stype = torch.jit.Attribute(stype, str)
         if top_db is not None and top_db < 0:
             raise ValueError('top_db must be positive value')
@@ -94,17 +94,17 @@ def __init__(self, stype='power', top_db=None):
         self.db_multiplier = math.log10(max(self.amin, self.ref_value))
 
     @torch.jit.script_method
-    def forward(self, specgram):
+    def forward(self, x):
         r"""Numerically stable implementation from Librosa
         https://librosa.github.io/librosa/_modules/librosa/core/spectrum.html
 
         Args:
-            specgram (torch.Tensor): STFT of size (c, f, t)
+            x (torch.Tensor): Input tensor before being converted to decibel scale
 
         Returns:
-            torch.Tensor: STFT after changing scale of size (c, f, t)
+            torch.Tensor: Output tensor in decibel scale
         """
-        return F.spectrogram_to_DB(specgram, self.multiplier, self.amin, self.db_multiplier, self.top_db)
+        return F.amplitude_to_DB(x, self.multiplier, self.amin, self.db_multiplier, self.top_db)
 
 
 class MelScale(torch.jit.ScriptModule):
@@ -246,7 +246,7 @@ def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm='ortho', log_m
         self.dct_type = dct_type
         self.norm = torch.jit.Attribute(norm, Optional[str])
         self.top_db = 80.0
-        self.spectrogram_to_DB = SpectrogramToDB('power', self.top_db)
+        self.amplitude_to_DB = AmplitudeToDB('power', self.top_db)
 
         if melkwargs is not None:
             self.MelSpectrogram = MelSpectrogram(sample_rate=self.sample_rate, **melkwargs)
@@ -273,7 +273,7 @@ def forward(self, waveform):
             log_offset = 1e-6
             mel_specgram = torch.log(mel_specgram + log_offset)
         else:
-            mel_specgram = self.spectrogram_to_DB(mel_specgram)
+            mel_specgram = self.amplitude_to_DB(mel_specgram)
         # (c, `n_mels`, t).tranpose(...) dot (`n_mels`, `n_mfcc`) -> (c, t, `n_mfcc`).tranpose(...)
         mfcc = torch.matmul(mel_specgram.transpose(1, 2), self.dct_mat).transpose(1, 2)
         return mfcc