In [1]:
import numpy as np
import os
import glob

import librosa
from librosa import load
from librosa import cqt
from librosa.display import specshow

from tqdm import tqdm

In [2]:
data_dir = '/Users/sripathisridhar/Desktop/NTVOW/'
hop_size= 512
q= 24

In [3]:
len(os.listdir(data_dir))

3192

In [4]:
file_paths= sorted(glob.glob(os.path.join(data_dir, '*.wav')))

file_names= []
for file_path in file_paths:
    file_names.append(os.path.basename(file_path))

In [5]:
features_dict= {}
feature_key= ''

for file_path in tqdm(file_paths, disable=False):

    # Read audio files
    waveform, sample_rate= load(file_path, sr=None)

    # Compute CQTs
    cqt_complex= cqt(y=waveform,
                    sr=sample_rate,
                    hop_length=hop_size,
                    bins_per_octave=q,
                    n_bins=q*7,
                    sparsity=1e-6,
                    )
    scalogram= np.abs(cqt_complex)**2

    # Grab middle frame
    feature= scalogram[:,scalogram.shape[1]//2]

    # Stack in dict
    file_name= os.path.basename(file_path)
    feature_key= f'{file_name}'
    features_dict[feature_key]= feature

100%|██████████| 3190/3190 [02:18<00:00, 22.98it/s]


In [6]:
import h5py

with h5py.File("NTVow.h5", "w") as f:
    for key in features_dict.keys():
        f[key] = features_dict[key]

In [7]:
with h5py.File("NTVow.h5", "r") as f:
    my_dict = {key:f[key][()] for key in f.keys()}

In [8]:
[key for i,key in enumerate(my_dict.keys()) if i in range(500,510)]

['k7brii02.wav',
 'k7brii03.wav',
 'k7brii04.wav',
 'k7brii05.wav',
 'k7brii06.wav',
 'k7bril01.wav',
 'k7bril02.wav',
 'k7bril03.wav',
 'k7bril04.wav',
 'k7bril05.wav']