From e0a24405e72758dfe1a6413f2a19a8cd85d6aea3 Mon Sep 17 00:00:00 2001 From: Evgeniy Shabalin <36159472+treacker@users.noreply.github.com> Date: Thu, 22 Dec 2022 01:05:09 +0400 Subject: [PATCH] Fix preprocessing bug --- hw4_nv/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw4_nv/README.md b/hw4_nv/README.md index fe9c8b7..0263f66 100644 --- a/hw4_nv/README.md +++ b/hw4_nv/README.md @@ -42,7 +42,6 @@ class MelSpectrogram(nn.Module): super(MelSpectrogram, self).__init__() self.config = config - self.mel_spectrogram = torchaudio.transforms.MelSpectrogram( sample_rate=config.sr, win_length=config.win_length, @@ -50,7 +49,8 @@ class MelSpectrogram(nn.Module): n_fft=config.n_fft, f_min=config.f_min, f_max=config.f_max, - n_mels=config.n_mels + n_mels=config.n_mels, + center=False, ) # The is no way to set power in constructor in 0.5.0 version. @@ -72,7 +72,8 @@ class MelSpectrogram(nn.Module): :param audio: Expected shape is [B, T] :return: Shape is [B, n_mels, T'] """ - + audio = torch.nn.functional.pad(audio.unsqueeze(1), (int((self.config.n_fft-self.config.hop_length)/2), int((self.config.n_fft-self.config.hop_length)/2)), mode='reflect') + audio = audio.squeeze(1) mel = self.mel_spectrogram(audio) \ .clamp_(min=1e-5) \ .log_()