In [1]:
import numpy as np
import os
import glob

import librosa
from librosa import load
from librosa import cqt
from librosa.display import specshow

from tqdm import tqdm

In [2]:
data_dir = '/Users/sripathisridhar/Desktop/SOL'
hop_size= 512
q= 24

In [3]:
file_paths= sorted(glob.glob(os.path.join(data_dir, '**', '*.wav')))

file_names= []
for file_path in file_paths:
    file_names.append(os.path.basename(file_path))

In [4]:
features_dict= {}
feature_key= ''

for file_path in tqdm(file_paths, disable=False):

    # Read audio files
    waveform, sample_rate= load(file_path, sr=None)

    # Compute CQTs
    cqt_complex= cqt(y=waveform,
                    sr=sample_rate,
                    hop_length=hop_size,
                    bins_per_octave=q,
                    n_bins=q*7,
                    sparsity=1e-6,
                    )
    scalogram= np.abs(cqt_complex)**2

    # Find frame with maximum RMS value
    rms= librosa.feature.rms(y=waveform, hop_length=hop_size)
    rms_argmax= np.argmax(rms)
    feature= scalogram[:,rms_argmax]

    # Stack in dict
    file_name= os.path.basename(file_path)
    feature_key= f'{file_name}'
    features_dict[feature_key]= feature

100%|██████████| 1212/1212 [05:01<00:00,  4.62it/s]


In [5]:
import h5py

with h5py.File("SOL.h5", "w") as f:
    for key in features_dict.keys():
        f[key] = features_dict[key]

In [6]:
with h5py.File("SOL.h5", "r") as f:
    my_dict = {key:f[key][()] for key in f.keys()}

In [8]:
[key for i,key in enumerate(my_dict.keys()) if i in range(500,510)]

['Fl-ord-B4-ff.wav',
 'Fl-ord-B4-mf.wav',
 'Fl-ord-B4-pp.wav',
 'Fl-ord-B5-ff.wav',
 'Fl-ord-B5-mf.wav',
 'Fl-ord-B5-pp.wav',
 'Fl-ord-B6-ff.wav',
 'Fl-ord-B6-mf.wav',
 'Fl-ord-B6-pp.wav',
 'Fl-ord-C#4-ff.wav']