In [36]:
# importing multiple visualization libraries
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mlab
import pylab as pl
import seaborn

In [37]:
# importing libraries to manipulate the data files
import os
from glob import glob

In [38]:
# import a library to read the .aiff format
import aifc

In [39]:
filenames = glob(os.path.join('whale_data','train','*.aiff'))

In [105]:
# transform the data
fs = 2000 # frequency
XX = np.zeros((X.shape[0],129)).astype("float32")   # allocate space
for i in range(X.shape[0]):
    XX[i] = 10*np.log10(signal.welch(X[i], fs=fs, window='hanning', nperseg=256, noverlap=128+64)[1])

In [106]:
from scipy import signal

In [107]:
# read signals and store as numpy arrays
feature_dict = {}
fs = 2000
for filename in filenames[::1]:
    aiff = aifc.open(filename,'r')
    whale_strSig = aiff.readframes(aiff.getnframes())
    whale_array = np.fromstring(whale_strSig, np.short).byteswap()
    feature = 10*np.log10(signal.welch(whale_array, fs=fs, window='hanning', nperseg=256, noverlap=128+64)[1])
    feature_dict[filename] = feature

In [108]:
import pandas as pd
XX = pd.DataFrame(feature_dict)
XX = np.array(XX)
XX.shape

(129, 30000)

In [109]:

# Deep learning on time domain samples.
from __future__ import division
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.optimizers import SGD
from keras.layers.normalization import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger

In [110]:
# reading the stored data
X = np.load('X.npy').T
y = np.load('Y.npy')

In [111]:
(y).dtype

dtype('int64')

In [112]:
#y = y.astype('str')
y = y.astype('float32')

In [113]:
from sklearn.model_selection import train_test_split
target_names = ['Upcall', 'NO_Upcall']


X_train, X_test, y_train, y_test = train_test_split(XX.T, y, test_size=0.20, random_state=2018)

# Convert label to onehot
#y_train = keras.utils.to_categorical(y_train, num_classes=2)
#y_test = keras.utils.to_categorical(y_test, num_classes=2)

print(X_train.shape)
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

(24000, 129)


In [115]:
# Build the Neural Network
model = Sequential()

model.add(Conv1D(16, 3, activation='relu', input_shape=(129, 1)))
model.add(Conv1D(16, 3, activation='relu'))
model.add(BatchNormalization())

model.add(Conv1D(32, 3, activation='relu'))
model.add(Conv1D(32, 3, activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling1D(2))
model.add(Conv1D(64, 3, activation='relu'))
model.add(Conv1D(64, 3, activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling1D(2))
model.add(Conv1D(128, 3, activation='relu'))
model.add(Conv1D(128, 3, activation='relu'))
model.add(BatchNormalization())

model.add(MaxPooling1D(2))
model.add(Conv1D(256, 3, activation='relu'))
model.add(Conv1D(256, 3, activation='relu'))
model.add(BatchNormalization())
model.add(GlobalAveragePooling1D())

model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=SGD(lr=0.01),
              metrics=['accuracy'])




model_name = 'deep_1'
top_weights_path = 'model_' + str(model_name) + '.h5'

callbacks_list = [ModelCheckpoint(top_weights_path, monitor = 'val_acc', verbose = 1, save_best_only = True, save_weights_only = True), 
    EarlyStopping(monitor = 'val_acc', patience = 6, verbose = 1),
    ReduceLROnPlateau(monitor = 'val_acc', factor = 0.1, patience = 3, verbose = 1),
    CSVLogger('model_' + str(model_name) + '.log')]

* Max Pooling/ Average Pooling
* Batch normalization
* Epochs
* Adam Optimizer
* Convolutional Layers
* Cross Entropy
* Adam Optimizer
* ReLU
* Batch Size
* Learning Rate

In [116]:
# checking tensorflow version
import tensorflow
tensorflow.__version__

'1.9.0'

In [None]:
%%time
# Fitting the Model (this will take a loooooooot of time)
model.fit(X_train, y_train, batch_size=128, epochs=100, validation_data = [X_test, y_test], callbacks = callbacks_list)


model.load_weights(top_weights_path)
loss, acc = model.evaluate(X_test, y_test, batch_size=16)

#print('loss', loss)
print('Test accuracy:', acc)

Train on 24000 samples, validate on 6000 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.80400, saving model to model_deep_1.h5
Epoch 2/100

Epoch 00002: val_acc did not improve from 0.80400
Epoch 3/100

Epoch 00003: val_acc improved from 0.80400 to 0.84000, saving model to model_deep_1.h5
Epoch 4/100

Epoch 00004: val_acc improved from 0.84000 to 0.84333, saving model to model_deep_1.h5
Epoch 5/100

Epoch 00005: val_acc did not improve from 0.84333
Epoch 6/100

Epoch 00006: val_acc did not improve from 0.84333
Epoch 7/100

Epoch 00007: val_acc did not improve from 0.84333

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 8/100

Epoch 00008: val_acc improved from 0.84333 to 0.85767, saving model to model_deep_1.h5
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.85767
Epoch 10/100

Epoch 00010: val_acc improved from 0.85767 to 0.85833, saving model to model_deep_1.h5
Epoch 11/100

In [None]:
sum(y_test)/len(y_test)

In [None]:
sum(y_train)/len(y_train)

In [None]:
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
model.fit(x_train, y_train, verbose=1, callbacks=[tensorboard])

References:

[Deep Learning Glossary](http://www.wildml.com/deep-learning-glossary/)

[Keras and NN Tutorial](https://indico.cern.ch/event/506145/contributions/2132944/attachments/1258124/1858154/NNinKeras_MPaganini.pdf)

Free GPU usage: Google Colaboratory notebooks.