In [1]:
import numpy as np
import librosa
import os
from tqdm import tqdm

In [2]:
sample_rate = 22050
fourier_window_size = 2048
max_db = 100
preemphasis = 0.97
frame_shift = 0.0125
frame_length = 0.05
hop_length = int(sample_rate * frame_shift)
win_length = int(sample_rate * frame_length)
n_mels = 80
ref_db = 20
resampled = 5

def get_spectrogram(audio_file):
    y, sr = librosa.load(audio_file, sr = sample_rate)
    y, _ = librosa.effects.trim(y)
    y = np.append(y[0], y[1:] - preemphasis * y[:-1])
    linear = librosa.stft(
        y = y,
        n_fft = fourier_window_size,
        hop_length = hop_length,
        win_length = win_length,
    )
    mag = np.abs(linear)
    mel_basis = librosa.filters.mel(sample_rate, fourier_window_size, n_mels)
    mel = np.dot(mel_basis, mag)
    mel = 20 * np.log10(np.maximum(1e-5, mel))
    mag = 20 * np.log10(np.maximum(1e-5, mag))
    mel = np.clip((mel - ref_db + max_db) / max_db, 1e-8, 1)
    mag = np.clip((mag - ref_db + max_db) / max_db, 1e-8, 1)
    return mel.T.astype(np.float32), mag.T.astype(np.float32)

def load_file(path):
    fname = os.path.basename(path)
    mel, mag = get_spectrogram(path)
    t = mel.shape[0]
    num_paddings = resampled - (t % resampled) if t % resampled != 0 else 0
    mel = np.pad(mel, [[0, num_paddings], [0, 0]], mode = 'constant')
    mag = np.pad(mag, [[0, num_paddings], [0, 0]], mode = 'constant')
    return fname, mel.reshape((-1, n_mels * resampled)), mag

In [3]:
if not os.path.exists('mel_old'):
    os.mkdir('mel_old')
if not os.path.exists('mag_old'):
    os.mkdir('mag_old')

wav_files = [f for f in os.listdir('old') if f.endswith('.wav')]

for fpath in tqdm(wav_files):
    fname, mel, mag = load_file('old/' + fpath)
    np.save('mel_old/{}'.format(fname.replace('wav', 'npy')), mel)
    np.save('mag_old/{}'.format(fname.replace('wav', 'npy')), mag)

100%|██████████| 200/200 [00:25<00:00,  7.88it/s]


In [4]:
if not os.path.exists('mel_young'):
    os.mkdir('mel_young')
if not os.path.exists('mag_young'):
    os.mkdir('mag_young')

wav_files = [f for f in os.listdir('young') if f.endswith('.wav')]

for fpath in tqdm(wav_files):
    fname, mel, mag = load_file('young/' + fpath)
    np.save('mel_young/{}'.format(fname.replace('wav', 'npy')), mel)
    np.save('mag_young/{}'.format(fname.replace('wav', 'npy')), mag)

100%|██████████| 200/200 [00:25<00:00,  7.98it/s]
