In [1]:
%matplotlib inline
import librosa
import numpy as np
import pandas
import gzip
import os
import glob
import matplotlib.pyplot as plt
import scipy
import sklearn
import subprocess
import json
from tqdm import tqdm
print("librosa: {:s}".format(librosa.__version__))
print("numpy: {:s}".format(np.__version__))
print("scipy: {:s}".format(scipy.__version__))
print("sklearn: {:s}".format(sklearn.__version__))
print("librosa: {:s}".format(sklearn.__version__))
sp = subprocess.run("which python", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
print("conda kernel: {:s}".format(str(sp.stdout).split('/')[-3]))

librosa: 0.5.0
numpy: 1.12.1
scipy: 0.19.0
sklearn: 0.18.1
librosa: 0.18.1
conda kernel: py35


In [2]:
# ANALYSIS PARAMETERS 
FS = 44100
FRAMESIZE = 1764
HOPSIZE = 882
FFTSIZE = 2048
WINDOWTYPE = 'hann'

# Mel band parameters
MEL_numberBands = 128

metadata = {
    'fs': FS,
    'frame_size': FRAMESIZE,
    'hop_size': HOPSIZE,
    'window_type': WINDOWTYPE,
    'mel_numberBands': MEL_numberBands,
    'hpc_server': 'prince',
    'conda_env': str(sp.stdout).split('/')[-3],
    'librosa': librosa.__version__,
    'numpy': np.__version__,
    'scipy': scipy.__version__,
    'sklearn': sklearn.__version__
}

In [3]:
def extract_logmelspec_librosa(audiofile, outputfile):
    '''
    Given the path to a file, use librosa to extract the log mel spectrogram and
    save it to outputfile.
    '''
#     print('--> Extracting Mel spectrogram for {:s} (bands={:d})'.format(os.path.basename(audiofile), MEL_numberBands))

    audio, sr = librosa.load(audiofile, sr=FS, mono=True)
    stft = librosa.stft(audio, n_fft=FFTSIZE, win_length=FRAMESIZE,
                        hop_length=HOPSIZE, window=WINDOWTYPE)
    stft = np.abs(stft)**2
    melspec = librosa.feature.melspectrogram(
        y=None, S=stft, sr=FS, n_fft=FFTSIZE, hop_length=HOPSIZE,
        n_mels=MEL_numberBands, htk=True, fmin=0.0, fmax=FS/2.0)

    logmelspec = librosa.core.logamplitude(melspec, ref=1.0)
    logmelspec = logmelspec.astype(np.float32) # downcast to float32

    if not os.path.isdir(os.path.split(outputfile)[0]):
        os.mkdir(os.path.split(outputfile)[0])

    f = gzip.open(outputfile, 'wb')
    logmelspec.dump(f)
    f.close()

In [4]:
# TRAIN
splits = ['train', 'validate', 'test']

for split in splits:
    print(split)
    audio_folder = '/scratch/js7561/datasets/scaper_waspaa2017/urban-sed/audio/' + split
    features_folder = '/scratch/js7561/datasets/scaper_waspaa2017/urban-sed/features/logmelspec1764_128/' + split

    if not os.path.isdir(features_folder):
        os.mkdir(features_folder)

    # Create folder
    outfolder = features_folder
    if not os.path.isdir(outfolder):
        os.mkdir(outfolder)

    # Save metadata
    metadata_file = os.path.join(outfolder, '_metadata.json')
    json.dump(metadata, open(metadata_file, 'w'), indent=2)

    # Iterate over audio files
    infolder = audio_folder
    audiofiles = glob.glob(os.path.join(infolder, '*.wav'))
    audiofiles = sorted(audiofiles)
    for af in tqdm(audiofiles):
        outfile = os.path.join(outfolder, os.path.basename(af).replace(".wav", ".npy.gz"))
        extract_logmelspec_librosa(af, outfile)

  0%|          | 0/6000 [00:00<?, ?it/s]

train


100%|██████████| 6000/6000 [23:51<00:00,  6.28it/s]
  0%|          | 1/2000 [00:00<05:38,  5.90it/s]

validate


100%|██████████| 2000/2000 [05:17<00:00,  5.97it/s]
  0%|          | 1/2000 [00:00<05:38,  5.90it/s]

test


100%|██████████| 2000/2000 [05:15<00:00,  5.96it/s]
