In [7]:
from glob import glob
import librosa
import numpy as np

from multiprocessing import Pool


# bird_classifier/bird and bird_classifier/background
background = glob("/mnt/md0/urbansounddb/UrbanSound8K/audio/**/*wav")
birds = glob('/mnt/md0/birdsong-recognition/train_audio/**/*mp3')




In [39]:
# define a function for the threaded work
def doIt(f):
    x_train=[]
    y_train =[]
    if("wav" in f):
        theclass=0
    else:
        theclass=1
    try:
        mp3, sr = librosa.load(f, sr = 22050//2)
        for i in range(0, mp3.shape[0], sr*3):
            x = mp3[i:i+sr*3]
            if(x.shape[0]== sr*3):
                x_train.append(x)
                y_train.append(theclass)
    except Exception as e:
        print(e)
    return x_train, y_train

if __name__ == '__main__':   
    p = Pool(30)    # create 30 computers 
    pooled_birds = p.map(doIt, birds[::50])  # send the function "doit" plus all the filenames to be processed to all the new threads
    pooled_background  = p.map(doIt, background)  # different offset (1) and a stride that is multiple of training.   Guaranteed to be unique

p.terminate()  # clean up otherwise it seems to cause problems with the kernel 



In [41]:
print(len(pooled_birds), len(pooled_background))

428 8732


In [42]:
def unpoolData(pooled):
    xx= []  # temporary placeholdsrs
    yy= []  # to unpack results
    for f in pooled:
        for ff in f[0]:
            xx.append(ff)
        for ff in f[1]:
            yy.append(ff)

    xx=np.array(xx)
    yy = np.array(yy)
    return xx, yy
 

In [43]:
x_train, y_train = unpoolData(pooled_birds + pooled_background)

In [44]:
len(x_train), len(y_train)
x_train.shape

(16342, 33075)

In [45]:
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import Adam, Adagrad

model=Sequential()
sr = 33075
model.add(Conv1D(16,3, input_shape=(sr,1), activation='relu'))
model.add(MaxPooling1D(2))
model.add(Conv1D(32,3, padding='same', activation='relu'))
model.add(MaxPooling1D(4))
model.add(Conv1D(32,3, padding='same', activation='relu'))
model.add(MaxPooling1D(4))
model.add(Conv1D(64,3, activation='relu'))
model.add(MaxPooling1D(4))
model.add(Conv1D(128,3, activation='relu'))
model.add(Dropout(.1))
model.add(MaxPooling1D(3))
model.add(Conv1D(256,3, activation='relu'))
model.add(Dropout(.1))
model.add(MaxPooling1D(3))
model.add(Flatten())
model.add(Dense(300, activation='relu'))
model.add(Dropout(.1))
model.add(Dense(300, activation='relu'))
model.add(Dropout(.1))  
model.add(Dense(1, activation='sigmoid')) # multiclass classifier vs binary classifier would use sigmoid


# because softmax we use categorical
model.compile(loss='binary_crossentropy', optimizer=Adagrad(), metrics=['acc'])
model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_13 (Conv1D)           (None, 33073, 16)         64        
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 16536, 16)         0         
_________________________________________________________________
conv1d_14 (Conv1D)           (None, 16536, 32)         1568      
_________________________________________________________________
max_pooling1d_14 (MaxPooling (None, 4134, 32)          0         
_________________________________________________________________
conv1d_15 (Conv1D)           (None, 4134, 32)          3104      
_________________________________________________________________
max_pooling1d_15 (MaxPooling (None, 1033, 32)          0         
_________________________________________________________________
conv1d_16 (Conv1D)           (None, 1031, 64)         

In [50]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
model.fit(x_train, y_train, validation_split=.1, epochs=10)

Train on 14707 samples, validate on 1635 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f78ec3f4510>

In [47]:
np.sum(y_train)/y_train.shape[0]

0.5430791824746053