In [1]:
import os
import pickle
import numpy as np
import soundfile as sf
import librosa
from scipy import signal
from scipy.signal import get_window
from librosa.filters import mel
from librosa.util import normalize
from numpy.random import RandomState

In [2]:
import torch
import IPython.display as ipd

In [3]:
# audio file directory
rootDir = './train_data_librispeech'
# spectrogram directory
targetDir = './train_spmel_librispeech'

In [4]:
dirName, subdirList, _ = next(os.walk(rootDir))
print('Found directory: %s' % dirName)

Found directory: ./train_data_librispeech


In [5]:
def mel_gan_handler(x, fft_length = 1024, hop_length = 256,sr = 22050):
    wav = normalize(x)
    p = (fft_length - hop_length) // 2
    wav = np.squeeze(np.pad(wav, (p, p), "reflect"))
    fft = librosa.stft(
                       wav, 
                       n_fft = fft_length, 
                       hop_length = hop_length,
                       window = 'hann',
                       center = False
                     )
    # 這裡的 abs 是 sqrt(實部**2 + 虛部**2)
    mag = abs(fft)
    mel_basis = mel(sr, 1024, fmin = 0.0 , fmax=None, n_mels=80)
    mel_output = np.dot(mel_basis,mag)
    log_mel_spec = np.log10(np.maximum(1e-5,mel_output)).astype(np.float32)
    return log_mel_spec

In [6]:
target_fs = 22050

In [7]:
for subdir in sorted(subdirList):
    print(subdir)
    if not os.path.exists(os.path.join(targetDir, subdir)):
        os.makedirs(os.path.join(targetDir, subdir))
    _,_, fileList = next(os.walk(os.path.join(dirName,subdir)))
    for fileName in sorted(fileList[:50]):
        # Read audio file
        x, fs = librosa.load(os.path.join(dirName,subdir,fileName))
        # resample to target_rate
        x = librosa.resample(x, fs, target_fs)
        S = mel_gan_handler(x).T 
        # save spect    
        np.save(os.path.join(targetDir, subdir, fileName[:-3]),S.astype(np.float32), allow_pickle=False)

100
1001
101
1012
1018
1025
1027
1028
1031
1046
1050
1052
1053
1054
1058
1060
1061
1066
1079
1093
1100
1112
112
1121
114
115
1160
1165
1175
1182
119
1195
1212
122
1222
1224
1226
1241
1259
126
1264
1265
1271
1283
1289
1290
1296
1311
1313
1316
1322
1323
1335
1336
1337
1343
1348
1349
1365
1379
1382
1383
1387
1390
1392
14
1401
1413
1417
1422
1425
1445
1446
1448
1456
1460
1463
1472
1473
1482
1487
1498
1509
1513
1535
1536
154
1547
1552
1556
157
1571
159
16
1603
1607
1629
1634
1638
1639
1641
1649
166
1668
1678
17
1705
1724
1731
1734
1740
1748
175
1752
1754
176
1769
1776
1777
1779
1789
1800
1801
1806
1811
1825
1826
1827
1845
1849
1851
1859
1874
188
1885
1903
1913
1914
192
1923
1933
1943
1944
1958
1961
1974
1987
2004
2010
2012
203
2039
204
2045
205
2053
2056
2060
2061
207
2074
208
2085
209
2093
210
2110
2113
2127
2137
2149
2156
216
2162
2167
217
2194
22
2201
2204
2229
2230
2238
224
2240
225
2254
2256
2269
227
2272
2285
2294
2299
23
231
2319
2334
2348
2364
2368
2388
2393
2397
240
2401
2404
2411
