In [6]:
import os
from pathlib import Path
import numpy as np
from tqdm import tqdm
import librosa
from librosa import feature

In [16]:
def get_audio(sample_path):
    """
    Load an audio file and extract a set of spectral features
    """
    
    audio, sr = librosa.load(sample_path, sr=16000)
    
    buffer = np.zeros(16384, dtype=np.float32)
    
    if audio.shape[0] < 16384:
        buffer[0:audio.shape[0]] = audio[:]
    else:
        buffer[:] = audio[0:16384]

    return buffer

In [17]:
def find_samples(search_folder, sample_type):
    """
    Look for WAV files stored under folders named sample_type
    that are somewhere within the search_folder directory
    """
    files = []
    for path in Path(search_folder).rglob(sample_type):
        for file in path.iterdir():
            if file.name.lower().endswith('wav'):
                files.append(file)
    
    return files

In [18]:
samples = {
    'kicks': find_samples('./drum_data', 'kick'),
    'snares': find_samples('./drum_data', 'snare')
}

print("Loaded %s kick drum samples" % len(samples['kicks']))
print("Loaded %s snare drum samples" % len(samples['snares']))

Loaded 2027 kick drum samples
Loaded 2210 snare drum samples


In [23]:
# Run feature extraction on all audio samples and keep track of the labels
dataset = []
sample_type = 'snares'
for sample in tqdm(samples[sample_type]):
    dataset.append(get_audio(sample))

100%|██████████| 2210/2210 [00:36<00:00, 59.86it/s]


In [24]:
# Convert dataset to numpy array type and standardize by removing the mean and scaling to unit variance
dataset = np.array(dataset, dtype=np.float)

In [25]:
dataset.shape

(2210, 16384)

In [26]:
np.save('snares.npy', dataset)