In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import time
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [3]:
EPSILON = 10e-10
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    raw_mel = librosa.feature.melspectrogram(X, sr=sample_rate).T
    mel = np.mean(raw_mel, axis=0)
    processed_mel = raw_mel + EPSILON
    logMel = np.mean(np.log(processed_mel),axis=0)
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,logMel, mel

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    features, labels, namePath = np.empty((0,296)), np.empty(0), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            mfccs, logmel, mel = extract_feature(fn)
            ext_features = np.hstack([mfccs,logmel, mel])
            features = np.vstack([features,ext_features])
            labels = np.append(labels, fn.split('/')[-1].split('-')[1])
            namePath = np.append(namePath, fn)
    return np.array(features), np.array(labels, dtype = np.int), np.array(namePath, dtype=np.string_)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

def one_hot_decode(labels):
    n_labels, n_unique_labels = np.shape(labels)
    one_hot_decode = np.zeros((n_labels))
    for idx in range(n_labels):
        for l in range(n_unique_labels):
            if(labels[idx, l] == 1):
                one_hot_decode[idx] = l
                break;
    return one_hot_decode

In [4]:
x = extract_feature("../../data/UrbanSound8K/audio/fold1/7061-6-0-0.wav")
nfMfccs = np.shape(x[0])[0]
nfLogMel = np.shape(x[1])[0]
nfMel = np.shape(x[2])[0]

In [5]:
print "Number of features"
print "* Mfcc: {0}".format(nfMfccs)
print "* logMel {0}".format(nfLogMel)
print "* Mel {0}".format(nfMel)

Number of features
* Mfcc: 40
* logMel 128
* Mel 128


In [7]:
parent_dir = '../../data/UrbanSound8K/audio/'
sub_dirs = ['fold1']
print "---Extracting features.... ---"
start_time = time.time()
features, labels, file_names = parse_audio_files(parent_dir,sub_dirs)
print "---Loading time: {0} seconds ---".format(time.time() - start_time)

---Extracting features.... ---
---Loading time: 192.633636951 seconds ---


In [8]:
one_hot_labels = one_hot_encode(labels)

### Shingling

In [54]:
window_length = 10
hop_length = 5

In [183]:
def shingle_features(features, window_length, hop_length, label): 
    nFrames, nFeatures = np.shape(features)
    if(nFrames >= hop_length):
        nMovingWindow = np.floor((nFrames-hop_length)/(window_length - hop_length))
        new_samples = np.zeros((nMovingWindow, nFeatures*window_length))
        labels = np.zeros(nMovingWindow)
        for window in np.arange(nMovingWindow):
            labels[window] = label
            start = window*hop_length
            end = start + window_length
            new_samples[window,:] = np.concatenate([features[i] for i in np.arange(start, end)], axis=0)
        return new_samples, labels
    return None, None

In [184]:
def extract_shingled_features(file_name):
    X, sample_rate = librosa.load(file_name)
    label = file_name.split('/')[-1].split('-')[1]
    raw_mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T
    raw_mel = librosa.feature.melspectrogram(X, sr=sample_rate).T
    processed_mel = raw_mel + EPSILON
    logmel = np.log(processed_mel)
    features = np.hstack([logmel, raw_mfccs])
    shingled, labels = shingle_features(features, window_length=window_length, hop_length=hop_length, label=label)
    return shingled, labels

In [208]:
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav', shingling=False):
    labels, namePath = np.empty(0), np.empty(0)
    features = np.empty((0,1680))
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            if(shingling):
                
                shingled, label = extract_shingled_features(fn)
                if(label != None):
                    labels = np.append(labels, label)
                    features = np.vstack([features,shingled])
                    
            else:
                features = np.empty((0,296))
                mfccs, logmel, mel = extract_feature(fn)
                labels = np.append(labels, fn.split('/')[-1].split('-')[1])
                ext_features = np.hstack([logmel, mfccs])
                features = np.vstack([features,ext_features])
                
            namePath = np.append(namePath, fn)
    return np.array(features), np.array(labels, dtype = np.int), np.array(namePath, dtype=np.string_)

In [196]:
features, labels, file_names = parse_audio_files(parent_dir,sub_dirs, shingling=True)



../../data/UrbanSound8K/audio/fold1/180937-7-3-11.wav
../../data/UrbanSound8K/audio/fold1/193394-3-0-4.wav
../../data/UrbanSound8K/audio/fold1/180937-7-1-3.wav
../../data/UrbanSound8K/audio/fold1/97317-2-0-23.wav
../../data/UrbanSound8K/audio/fold1/155202-9-0-42.wav
../../data/UrbanSound8K/audio/fold1/78360-4-0-6.wav
../../data/UrbanSound8K/audio/fold1/30823-8-0-0.wav
../../data/UrbanSound8K/audio/fold1/97317-2-0-34.wav
../../data/UrbanSound8K/audio/fold1/103074-7-3-2.wav
../../data/UrbanSound8K/audio/fold1/15564-2-0-2.wav
../../data/UrbanSound8K/audio/fold1/102106-3-0-0.wav
../../data/UrbanSound8K/audio/fold1/124489-9-0-18.wav
../../data/UrbanSound8K/audio/fold1/7383-3-1-0.wav
../../data/UrbanSound8K/audio/fold1/180937-7-3-33.wav
../../data/UrbanSound8K/audio/fold1/180937-7-2-5.wav
../../data/UrbanSound8K/audio/fold1/182800-2-2-1.wav
../../data/UrbanSound8K/audio/fold1/59277-0-0-0.wav
../../data/UrbanSound8K/audio/fold1/9031-3-4-0.wav
../../data/UrbanSound8K/audio/fold1/138031-2-0-7.w

In [192]:
np.shape(features)

(33, 1680)

In [197]:
np.save("nn_shingled_features_fold_1", features, allow_pickle=True)
np.save("nn_labels_fold_1", labels, allow_pickle=True)
np.save("nn_file_names_fold_1", file_names, allow_pickle=True)

### Test set

In [209]:
parent_dir = '../../data/'
sub_dirs = ['test_set_unfolded']
print "---Extracting features.... ---"
start_time = time.time()
test_features, test_labels, test_file_names = parse_audio_files(parent_dir,sub_dirs, shingling=True)
print "---Loading time: {0} seconds ---".format(time.time() - start_time)

---Extracting features.... ---




---Loading time: 23.9566111565 seconds ---


In [211]:
np.shape(test_labels)

(5419,)

In [214]:
np.save("nn_shingled_features_test", test_features, allow_pickle=True)
np.save("nn_labels_test", test_labels, allow_pickle=True)
np.save("nn_file_names_test", test_file_names, allow_pickle=True)

### Fold 2

In [216]:
parent_dir = '../../data/UrbanSound8K/audio/'
sub_dirs = ['fold2']
print "---Extracting features.... ---"
start_time = time.time()
features_f2, labels_f2, file_names_f2 = parse_audio_files(parent_dir,sub_dirs, shingling=True)
print "---Loading time: {0} seconds ---".format(time.time() - start_time)

---Extracting features.... ---




---Loading time: 202.471961021 seconds ---


In [218]:
np.save("nn_shingled_features_fold_2", features_f2, allow_pickle=True)
np.save("nn_labels_fold_2", labels_f2, allow_pickle=True)
np.save("nn_file_names_fold_2", file_names_f2, allow_pickle=True)