# 【PyTorch/librosa】Pythonで音声/音楽データをリサンプリングする
https://take-tech-engineer.com/pytorch-librosa-resample/

In [1]:
import os
import requests

_SAMPLE_DIR = "_sample_data"
SAMPLE_WAV_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.wav"
SAMPLE_WAV_PATH = os.path.join(_SAMPLE_DIR, "steam.wav")

os.makedirs(_SAMPLE_DIR, exist_ok=True)

with open(SAMPLE_WAV_PATH, 'wb') as f:
    f.write(requests.get(SAMPLE_WAV_URL).content)

In [2]:
import librosa

y, sr = librosa.load(SAMPLE_WAV_PATH, sr=None, mono=False)
print(y.shape)
# (2, 109368)
print(sr)
# 44100

re_sr = 8000
y_8k = librosa.resample(y=y, orig_sr=sr, target_sr=re_sr)
print(y_8k.shape)
# (2, 19840)

(2, 109368)
44100
(2, 19840)


In [3]:
y_8k_kaiser_fast = librosa.resample(y=y, orig_sr=sr, target_sr=re_sr, res_type='kaiser_fast')

In [4]:
import torch
import torchaudio

waveform, sample_rate = torchaudio.load(filepath=SAMPLE_WAV_PATH)

print(waveform.shape)
# torch.Size([2, 109368])
print(sample_rate)
# 44100

torch.Size([2, 109368])
44100


In [5]:
import torchaudio.functional as F

re_sample_rate = 8000

resampled_waveform = F.resample(waveform=waveform, orig_freq=sample_rate, new_freq=re_sample_rate)

print(resampled_waveform.shape)
# torch.Size([2, 19840])

torch.Size([2, 19840])


In [6]:
resampled_waveform_kaiser = F.resample(waveform=waveform, orig_freq=sample_rate, new_freq=re_sample_rate, resampling_method='kaiser_window')

In [7]:
import torchaudio.transforms as T

resampler = T.Resample(orig_freq=sample_rate, new_freq=re_sample_rate, resampling_method='kaiser_window')
resampled_waveform = resampler(waveform)
print(resampled_waveform.shape)
# torch.Size([2, 19840])

torch.Size([2, 19840])


In [8]:
%%timeit

for _ in range(10):
    resampled_waveform_kaiser = F.resample(waveform=waveform, orig_freq=sample_rate, new_freq=re_sample_rate, resampling_method='kaiser_window')
# 10 loops, best of 5: 198 ms per loop

212 ms ± 25.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit

for _ in range(10):
    resampled_waveform = resampler(waveform)
# 100 loops, best of 5: 11.7 ms per loop

11.9 ms ± 369 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
