In [23]:
import pickle, os, librosa, parmap, torchaudio
from glob import glob
import numpy as np
from multiprocessing import cpu_count
from concurrent.futures import ThreadPoolExecutor
import IPython.display as ipd
from scipy.io import loadmat


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
datapath = '/root/datasets/ai_challenge/interspeech20'
# datapath = '/root/datasets/DCASE2020/mic_dev'
SR = 16000
n_fft = 512
hop_length = 256
n_mels = 120

In [24]:
def loading(_path):
    data, sr = librosa.load(_path, sr=None, mono=False)
    num = int(_path.split('.')[-2][-6:])
    data = librosa.resample(data, sr, SR)
    y_harmonic1, y_percussive1 = librosa.effects.hpss(data[0])
    y_harmonic2, y_percussive2 = librosa.effects.hpss(data[1])
    S_harmonic1 = librosa.feature.melspectrogram(y_harmonic1, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_percussive1 = librosa.feature.melspectrogram(y_percussive1, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_harmonic2 = librosa.feature.melspectrogram(y_harmonic1, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    S_percussive2 = librosa.feature.melspectrogram(y_percussive1, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    log_Sh1 = librosa.power_to_db(S_harmonic1, ref=np.max)
    log_Sp1 = librosa.power_to_db(S_percussive1, ref=np.max)
    log_Sh2 = librosa.power_to_db(S_harmonic2, ref=np.max)
    log_Sp2 = librosa.power_to_db(S_percussive2, ref=np.max)
    
    _data = np.concatenate([np.expand_dims(log_Sh1,-1),np.expand_dims(log_Sp1,-1),np.expand_dims(log_Sh2,-1),np.expand_dims(log_Sp2,-1)],axis=-1)
    data = np.transpose(_data, [1,0,2])
    
    return {'data': data, 'index': num}

In [25]:

with ThreadPoolExecutor(max_workers=cpu_count() // 2) as pool:
    a = glob(os.path.join(datapath, 'train/*.wav'))
    _trainset = list(pool.map(loading, a))
with ThreadPoolExecutor(max_workers=cpu_count() // 2) as pool:
    a = glob(os.path.join(datapath, 'test/*.wav'))
    _testset = list(pool.map(loading, a))
trainlabel = loadmat(os.path.join(datapath,'train/metadata_wavs.mat'))['phi'][0]
testlabel = loadmat(os.path.join(datapath,'test/metadata_wavs.mat'))['phi'][0]
datapath = '/root/datasets/ai_challenge/interspeech20/acoustic'

In [26]:
trainset = len(_trainset) * [None]
testset = len(_testset) * [None]

In [27]:
for i in _trainset:
    trainset[i['index'] - 1] = i['data']
for i in _testset:
    testset[i['index'] - 1] = i['data']

In [28]:
pickle.dump(trainset, open(os.path.join(datapath, 'trainset_x.pickle'), 'wb'))
pickle.dump(trainlabel, open(os.path.join(datapath, 'trainset_y.pickle'), 'wb'))
pickle.dump(testset, open(os.path.join(datapath, 'testset_x.pickle'), 'wb'))
pickle.dump(testlabel, open(os.path.join(datapath, 'testset_y.pickle'), 'wb'))

In [22]:
os.listdir(datapath)

['testset_y.pickle',
 'trainset_x.pickle',
 'testset_x.pickle',
 'trainset_y.pickle']