In [68]:
import glob

import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import IPython

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

In [69]:
import opendatasets as od
# token 362de83c625279a01f8399c58a63abe8
od.download("https://www.kaggle.com/vinayshanbhag/bird-song-data-set")

Skipping, found downloaded files in "./bird-song-data-set" (use force=True to force download)


In [70]:
df = pd.read_csv('bird-song-data-set/bird_songs_metadata.csv')
df.sample(5)

Unnamed: 0,id,genus,species,subspecies,name,recordist,country,location,latitude,longitude,altitude,sound_type,source_url,license,time,date,remarks,filename
3273,480032,Melospiza,melodia,,Song Sparrow,Thomas G. Graves,United States,"Montana de Oro State Park, Los Osos, San Luis ...",35.2557,-120.8872,30,song,//www.xeno-canto.org/480032,//creativecommons.org/licenses/by-nc-sa/4.0/,7:21,2019-06-06,"Bird about 10 feet up on shrub/tree, about 30 ...",480032-4.wav
345,332644,Thryomanes,bewickii,,Bewick's Wren,Aidan Place,United States,"Pinnacles National Park, San Benito, California",36.4836,-121.1659,320,song,//www.xeno-canto.org/332644,//creativecommons.org/licenses/by-nc-sa/4.0/,10:00,2016-08-13,,332644-5.wav
3539,363141,Melospiza,melodia,,Song Sparrow,Frank Lambert,United States,"Yampa River Botanic Park, Routt Co, Colorado",40.4725,-106.8311,2100,song,//www.xeno-canto.org/363141,//creativecommons.org/licenses/by-nc-nd/4.0/,09:55,2015-04-25,,363141-4.wav
5009,233122,Cardinalis,cardinalis,superbus,Northern Cardinal,David Vander Pluym,United States,"Santa Maria River, La Paz County, Arizona",34.3007,-113.4803,380,song,//www.xeno-canto.org/233122,//creativecommons.org/licenses/by-nc-sa/4.0/,07:45,2015-03-18,Bird was singing from mesquite edge of riparia...,233122-0.wav
2823,177384,Turdus,migratorius,,American Robin,Jorge de Leon Cardozo and Susan Hochgraf,United States,"Trask Road, Willington, Tolland County, Connec...",41.8817,-72.2826,160,song,//www.xeno-canto.org/177384,//creativecommons.org/licenses/by-nc-sa/4.0/,18:00,2014-05-05,Bird in top of tall deciduous tree late aftern...,177384-3.wav


In [71]:
directory = 'bird-song-data-set/wavfiles/'

In [72]:
df = df[['name', 'filename']]
df['filename'] = df['filename'].apply(lambda filename: directory + filename)

In [73]:
df.sample(5)

Unnamed: 0,name,filename
502,Bewick's Wren,bird-song-data-set/wavfiles/217854-10.wav
3600,Song Sparrow,bird-song-data-set/wavfiles/325378-7.wav
1792,Northern Mockingbird,bird-song-data-set/wavfiles/170052-3.wav
3091,American Robin,bird-song-data-set/wavfiles/13602-14.wav
1433,Northern Mockingbird,bird-song-data-set/wavfiles/321932-6.wav


In [74]:
def get_spectrogram(filepath=None, y=None, sr=None):
    if filepath:
        y, sr = librosa.load(filepath)
    ms = librosa.feature.melspectrogram(y=y, sr=sr)
    log_ms = librosa.power_to_db(ms, ref=np.max)
    return log_ms

In [75]:
def get_(filepath=None, y=None, sr=None):
    if filepath:
        y, sr = librosa.load(filepath)
    ms = librosa.feature.melspectrogram(y=y, sr=sr)
    log_ms = librosa.power_to_db(ms, ref=np.max)
    return log_ms

In [76]:
df['spectrogram'] = df['filename'].apply(get_spectrogram)
df.sample(5)

Unnamed: 0,name,filename,spectrogram
2524,American Robin,bird-song-data-set/wavfiles/351863-9.wav,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
1521,Northern Mockingbird,bird-song-data-set/wavfiles/321905-4.wav,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
984,Northern Mockingbird,bird-song-data-set/wavfiles/542156-12.wav,"[[-53.809685, -52.35324, -55.054916, -56.72566..."
414,Bewick's Wren,bird-song-data-set/wavfiles/323974-11.wav,"[[-37.597366, -43.477203, -69.8914, -71.036255..."
5352,Northern Cardinal,bird-song-data-set/wavfiles/18344-7.wav,"[[-21.779522, -20.557974, -23.31921, -26.48536..."


In [79]:
X = df[['spectrogram', 'filename']]
y = df['name']
x_max = np.max(np.abs(np.array(df['spectrogram'].tolist())))

In [168]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=42)


In [169]:
def get_important_x_y(x, y):
    x_new = x / x_max
    x_new = np.array(x_new.to_list())
    y_new = pd.get_dummies(np.array(y))
    return x_new, y_new

In [170]:
x_test_files = x_test['filename']

In [171]:
x_train, y_train = get_important_x_y(x_train['spectrogram'], y_train)
x_test, y_test = get_important_x_y(x_test['spectrogram'], y_test)
x_val, y_val = get_important_x_y(x_val['spectrogram'], y_val)
print(x_train.shape, y_train.shape)

(3252, 128, 130) (3252, 5)


In [173]:
input_size = x_train[0].shape
input_size

(128, 130)

In [174]:
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_size))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_7 (Conv1D)           (None, 126, 32)           12512     
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 63, 32)           0         
 1D)                                                             
                                                                 
 conv1d_8 (Conv1D)           (None, 61, 64)            6208      
                                                                 
 max_pooling1d_7 (MaxPooling  (None, 30, 64)           0         
 1D)                                                             
                                                                 
 flatten_3 (Flatten)         (None, 1920)              0         
                                                                 
 dense_6 (Dense)             (None, 128)              

In [175]:
model.fit(x_train, y_train, epochs=20, batch_size=64, validation_data=(x_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x136783e90>