In [1]:
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import numpy as np
from pytorch_nsynth_lib.nsynth import NSynth
from IPython.display import Audio

import librosa
import librosa.display
import phase_operation
from tqdm import tqdm
import h5py

In [2]:
import spec_ops as spec_ops
import phase_operation as phase_op
import spectrograms_helper as spec_helper

In [3]:
train_data = h5py.File('../data/Nsynth_melspec_IF_pitch.hdf5', 'w')


In [4]:
# audio samples are loaded as an int16 numpy array
# rescale intensity range as float [-1, 1]
toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)
# use instrument_family and instrument_source as classification targets
dataset = NSynth(
        "../data/nsynth/nsynth-train",
        transform=toFloat,
        blacklist_pattern=[ "string"],  # blacklist string instrument
        categorical_field_list=["instrument_family","pitch"])
loader = data.DataLoader(dataset, batch_size=1, shuffle=True)


In [5]:
def expand(mat):
    expand_vec = np.expand_dims(mat[:,125],axis=1)
    expanded = np.hstack((mat,expand_vec,expand_vec))
    return expanded

In [None]:
spec_list=[]
pitch_list=[]
IF_list =[]
mel_spec_list=[]
mel_IF_list=[]

pitch_set =set()
count=0
for samples, instrument_family, pitch, targets in loader:
    
    pitch = targets['pitch'].data.numpy()[0]

    if pitch < 24 or pitch > 84:
#         print("pitch",pitch)
        continue
        
    sample = samples.data.numpy().squeeze()
    spec = librosa.stft(sample, n_fft=2048, hop_length = 512)
    
    magnitude = np.log(np.abs(spec)+ 1.0e-6)[:1024]
#     print("magnitude Max",magnitude.max(),"magnitude Min",magnitude.min())
    angle =np.angle(spec)
#     print("angle Max",angle.max(),"angle Min",angle.min())

    IF = phase_operation.instantaneous_frequency(angle,time_axis=1)[:1024]
    
    magnitude = expand(magnitude)
    IF = expand(IF)
    logmelmag2, mel_p = spec_helper.specgrams_to_melspecgrams(magnitude, IF)

#     pitch = targets['pitch'].data.numpy()[0]
    
    
    assert magnitude.shape ==(1024, 128)
    assert IF.shape ==(1024, 128)
    
#     spec_list.append(magnitude)
#     IF_list.append(IF)
    pitch_list.append(pitch)
    mel_spec_list.append(logmelmag2)
    mel_IF_list.append(mel_p)
    pitch_set.add(pitch)
    
    count+=1
    if count%10000==0:
        print(count)

In [None]:
# train_data.create_dataset("Spec", data=spec_list)
# train_data.create_dataset("IF", data=IF_list)
train_data.create_dataset("pitch", data=pitch_list)
train_data.create_dataset("mel_Spec", data=mel_spec_list)
train_data.create_dataset("mel_IF", data=mel_IF_list)

# don't forget to close the file to avoid file-system errors
train_data.close()