In [1]:
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import numpy as np
from IPython.display import Audio
import librosa
import librosa.display
from tqdm import tqdm
import h5py
import os

from pytorch_nsynth_lib.nsynth import NSynth

from GANsynth_pytorch import phase_operation

In [2]:
from GANsynth_pytorch import spec_ops as spec_ops
from GANsynth_pytorch import phase_operation as phase_op
from GANsynth_pytorch import spectrograms_helper as spec_helper

In [3]:
base_path = '/media/theis/DATA/datasets/nsynth/valid/'

In [4]:
# audio samples are loaded as an int16 numpy array
# rescale intensity range as float [-1, 1]
toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)

# use instrument_family and instrument_source as classification targets
dataset = NSynth(os.path.join(base_path, "nsynth-valid/"),
                 transform=toFloat,
                 #  blacklist_pattern=["string"],  # blacklist string instrument
                 #  categorical_field_list=["instrument_family","pitch"]
)

loader = data.DataLoader(dataset, batch_size=1, shuffle=False)

In [5]:
print(dataset[0])

[array([0.        , 0.00015259, 0.00045778, ..., 0.        , 0.        ,
       0.        ]), 4, {'note_str': 'keyboard_acoustic_004-021-050', 'sample_rate': 16000, 'qualities_str': ['dark', 'reverb'], 'instrument_source': 0, 'instrument_family_str': 'keyboard', 'instrument_family': 4, 'note': 212634, 'instrument_source_str': 'acoustic', 'qualities': [0, 1, 0, 0, 0, 0, 0, 0, 1, 0], 'pitch': 21, 'instrument_str': 'keyboard_acoustic_004', 'instrument': 327, 'velocity': 50}]


In [6]:
def expand(mat):
    expand_vec = np.expand_dims(mat[:,125],axis=1)
    expanded = np.hstack((mat,expand_vec,expand_vec))
    return expanded

In [7]:
pitch_set =set()
count=0
for samples, pitch, targets in tqdm(loader):
    
    pitch = targets['pitch'].data.numpy()[0]

    if pitch < 24 or pitch > 84:
#         print("pitch",pitch)
        continue
        
    sample = samples.data.numpy().squeeze()
    spec = librosa.stft(sample, n_fft=2048, hop_length = 512)
    
    magnitude = np.log(np.abs(spec)+ 1.0e-6)[:1024]
#     print("magnitude Max",magnitude.max(),"magnitude Min",magnitude.min())
    angle =np.angle(spec)
#     print("angle Max",angle.max(),"angle Min",angle.min())

    IF = phase_operation.instantaneous_frequency(angle,time_axis=1)[:1024]
    
    magnitude = expand(magnitude)
    IF = expand(IF)
    logmelmag2, mel_p = spec_helper.specgrams_to_melspecgrams(magnitude, IF)

#     pitch = targets['pitch'].data.numpy()[0]
    
    
    assert magnitude.shape ==(1024, 128)
    assert IF.shape ==(1024, 128)
    
    pitch_set.add(pitch)
    
    sample_name = targets['note_str'][0]
    with h5py.File(os.path.join(base_path, 'hdf5/no_target/' f'{sample_name}.h5'), 'w') as sample_file:
        sample_file.create_dataset("Spec", data=magnitude)
        sample_file.create_dataset("IF", data=IF)
        sample_file.create_dataset("mel_Spec", data=logmelmag2)
        sample_file.create_dataset("mel_IF", data=mel_p)
        sample_file.attrs.create("pitch", data=pitch)
    
    count+=1

100%|██████████| 12678/12678 [14:50<00:00, 14.23it/s] 


In [None]:
with h5py.File(os.path.join(base_path, 'hdf5/no_target/' f'{sample_name}.h5'), 'r') as sample_file:
        print(sample_file.attrs["pitch"])

In [None]:
for sample_index, sample in enumerate(dataset):
    _, _, sample_details = sample
    sample_name = sample_details['note_str']

    with h5py.File(f'/media/theis/DATA/datasets/nsynth/valid/hdf5/{sample_name}.h5', 'w') as sample_file:
#         sample_file.create_dataset("Spec", data=spec_list[sample_index])
#         sample_file.create_dataset("IF", data=IF_list[sample_index])
        sample_file.create_dataset("pitch", data=pitch_list[sample_index])
        sample_file.create_dataset("mel_Spec", data=mel_spec_list[sample_index])
        sample_file.create_dataset("mel_IF", data=mel_IF_list[sample_index])

In [None]:
len(mel_IF_list)

In [None]:
subset_data = h5py.File('/media/theis/DATA/datasets/nsynth/valid/Nsynth_melspec_IF_pitch-subset-50_first.hdf5', 'w')

subset_data.create_dataset("Spec", data=spec_list[:50])
subset_data.create_dataset("IF", data=IF_list[:50])
subset_data.create_dataset("pitch", data=pitch_list[:50])
subset_data.create_dataset("mel_Spec", data=mel_spec_list[:50])
subset_data.create_dataset("mel_IF", data=mel_IF_list[:50])

# don't forget to close the file to avoid file-system errors
subset_data.close()