In [1]:
# imports
from pathlib import Path
import numpy as np
from scipy.signal import resample
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout, BatchNormalization, MaxPooling1D
from keras import regularizers
from keras import initializers
from keras.optimizers import Adam, SGD, Adagrad
from keras.utils import np_utils
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [2]:
## data loading
datapath = Path('../dataset')
xfile = 'X_features_spec.npy'
yfile = 'Y_labels_spec.npy'
def load_waveforms():
    X_list = np.load(str(datapath.joinpath(xfile)))
    Y_list = np.load(str(datapath.joinpath(yfile)))
    return X_list, Y_list

def positve_samples(xlist):
    ## some samples have negative signs
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        for p in range(points.shape[0]):
            point = points[p]
            if np.sum(point) < 0:
                points[p] = -point
        xl_new.append(points)
    return np.array(xl_new)

## apply to loaded dataset
def split_channel(xlist):
    ## input as (n, 2500)
    def standard_resample(arr):
        return resample(arr, 2500)
    ## if some is not with dim 625, resample it
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        if points.shape[1] != 2500:
            print("resample")
            print(points.shape)
            points = np.apply_along_axis(standard_resample, axis=1, arr=points)
        points = points.reshape((points.shape[0], 4, 625))
        xl_new.append(points)
    return np.array(xl_new)

## input is after split
def apply_resample(xlist, outdim):
    ## resample
    def resample_waveform(arr):
        ## arr.shape: (indim, )
        return resample(arr, outdim)
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        points = np.apply_along_axis(resample_waveform, axis=2, arr=points)
        xl_new.append(points)
    return np.array(xl_new)

## input is combined exp. (18000 ,625, 4)
def get_xtrain_mean(x_train):
    ## mean value for each dimension (exp. each of 625 dim)
    m = np.mean(x_train, axis=0)
    ## then we can apply x_train - m for zero mean
    return m

## input is after split
## one variance for each channel
def normalize_waveform():
    ## we don't necessarily need this
    pass

def combine_samples(arrs):
    ## exp. arrs.shape: (20, ?)
    pass

def binary_label(ylist):
    ## 1, 2 --> 1
    ylist_new = []
    for sample in range(ylist.shape[0]):
        labels = ylist[sample]
        labels[labels > 1] = 1
        ylist_new.append(labels)
    return np.array(ylist_new)

def combine_samples(arrs):
    ## exp. arrs.shape: (20, ?)
    if arrs.shape[0] < 1:
        return arrs
    sp = list(arrs[0].shape)
    sp[0] = 0
    combined = np.zeros(sp)
    print("combined", combined.shape)
    for sample in range(arrs.shape[0]):
        arr = arrs[sample]
        combined = np.concatenate((combined, arr), axis=0)
    return combined

In [3]:
## global parameters

### models to test
* VERY DEEP CONVOLUTIONAL NEURAL NETWORKS FOR RAW WAVEFORMS

they have a clearly defined structure, and their data are of similar dimentions

* Raw Waveform-based Audio Classification Using Sample-level CNN Architectures
* SAMPLE-LEVEL DEEP CONVOLUTIONAL NEURAL NETWORKS FOR MUSIC AUTO-TAGGING USING RAW WAVEFORMS

realtively simple arch;

#### test raw waveform input first
* input: 2500 * 1 waveform (normalized, center to 0, variance 1)
* conv layer: with/withour overlapping. In the paper:
    * filter size 3, stride 3, 128 filters
    * filter size 80, stride 4, 256 filters
* batch normalization: after every conv layer
* max pool
    * stride of 3? 4?


### input
or we can make the input as 625 * 4

In [4]:
## model
## 1d conv, size 4 filter, 64 filters, stride 2
## output 1250 * 64
## batch norm
## maxpool 2 * 1
## output 625 * 64
## 1d conv, size 3 filter, stride 2, 128 filters
## maxpool 2 * 1
## output 312 * 64
## 1d conv, size 3 filter, stride 3, 128 filters
## output 104 * 128
## maxpool 2 * 1
## output 52 * 128
## 1d conv, size 3 filter, stride 2, 256 filters

In [5]:
## one time parameter for the model below
## regularizer
## l2
ker_reg = 0.1
act_reg = 0.1
## kernel_initializer
ker_init = initializers.glorot_normal(seed=None)
## shape
in_shape = (648, 4)
## learning rate
opt = Adam()
opt.lr = 0.001
##
OUTPUT_SIZE = 2
## batch size
bsize = 50
##
epochs = 30
## callback
model_callback = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)

In [6]:
## model
## resample data to 648 * 1
model = Sequential()
## 1d conv, size 3 filter, 64 filters, stride 1
## batch norm, batch after activation
## no maxpool
## keras.layers.Conv1D(filters, kernel_size, strides=1, padding='valid', dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)
model.add(Convolution1D(filters=64, kernel_size=3, strides=1, padding='same', input_shape=in_shape, kernel_initializer=ker_init, activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
## 1d conv, size 3 filter, 128 filters, stride 1
## batch norm, batch after activation
## maxpool 3 --> 216 * 128
model.add(Convolution1D(filters=128, kernel_size=3, strides=1, padding='same', input_shape=in_shape, activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=3))
## 1d conv, size 3 filter, 128 filters, stride 2
## batch norm, batch after activation
## max pool 3 -->  36 * 128
model.add(Convolution1D(filters=128, kernel_size=3, strides=2, padding='same', activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=3))
## 1d conv, size 3 filter, 256 filters, stride 2
## batch norm, batch after activation
## max pool 3 -->  6 * 256
model.add(Convolution1D(filters=256, kernel_size=3, strides=2, padding='same', activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=3))
## 1d conv, size 3 filter, 512 filters, stride 2
## batch norm, batch after activation
## max pool 3 -->  1 * 512
model.add(Convolution1D(filters=512, kernel_size=3, strides=2, padding='same',activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=3))
##
model.add(Flatten())
## fully connected
model.add(Dense(OUTPUT_SIZE))
## softmax
model.add(Activation('softmax'))

In [7]:
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 648, 64)           832       
_________________________________________________________________
batch_normalization_1 (Batch (None, 648, 64)           256       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 648, 128)          24704     
_________________________________________________________________
batch_normalization_2 (Batch (None, 648, 128)          512       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 216, 128)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 108, 128)          49280     
_________________________________________________________________
batch_normalization_3 (Batch (None, 108, 128)          512       
__________

In [16]:
## experiment
## case 12, 14 for test
## case 18 for validation
## other case for traning
x_list, y_list = load_waveforms()
x_list = positve_samples(x_list)
x_list = split_channel(x_list)
x_list = apply_resample(x_list, 648)
y_list = binary_label(y_list)
## apply transpose: 4 * 648 --> 648 * 4
for i in range(x_list.shape[0]):
    x_list[i] = np.transpose(x_list[i], axes=(0, 2, 1))
    print(x_list[i].shape)

resample
(2490, 1920)
resample
(878, 1920)
(2394, 648, 4)
(4144, 648, 4)
(3302, 648, 4)
(1272, 648, 4)
(389, 648, 4)
(2716, 648, 4)
(61, 648, 4)
(628, 648, 4)
(611, 648, 4)
(771, 648, 4)
(201, 648, 4)
(1354, 648, 4)
(2490, 648, 4)
(878, 648, 4)
(2506, 648, 4)
(1688, 648, 4)
(2067, 648, 4)
(1554, 648, 4)
(635, 648, 4)
(1439, 648, 4)


In [18]:
val_idx = [11]
test_idx = [17]
# val_idx = [17]
# test_idx = [11, 13]
train_list_x = []
train_list_y = []
val_list_x = []
val_list_y = []
test_list_x = []
test_list_y = []
for idx in range(x_list.shape[0]):
    if idx not in (val_idx + test_idx):
        train_list_x.append(x_list[idx])
        train_list_y.append(y_list[idx])
        
for idx in val_idx:
    val_list_x.append(x_list[idx])
    val_list_y.append(y_list[idx])
    
for idx in test_idx:
    test_list_x.append(x_list[idx])
    test_list_y.append(y_list[idx])  

train_list_x = np.array(train_list_x)
train_list_y = np.array(train_list_y)
val_list_x = np.array(val_list_x)
val_list_y = np.array(val_list_y)
test_list_x = np.array(test_list_x)
test_list_y = np.array(test_list_y)
train_list_x = combine_samples(train_list_x)
train_list_y = combine_samples(train_list_y)
val_list_x = combine_samples(val_list_x)
val_list_y = combine_samples(val_list_y)
test_list_x = combine_samples(test_list_x)
test_list_y = combine_samples(test_list_y)
train_list_y = np_utils.to_categorical(train_list_y, num_classes=2)
val_list_y = np_utils.to_categorical(val_list_y, num_classes=2)
test_list_y = np_utils.to_categorical(test_list_y, num_classes=2)

combinde (0, 648, 4)
combinde (0,)
combinde (0, 648, 4)
combinde (0,)
combinde (0, 648, 4)
combinde (0,)


In [23]:
model.fit(train_list_x, train_list_y,
          epochs=epochs,
          batch_size=bsize,
          verbose=2,
          validation_data=(test_list_x, test_list_y),
          callbacks=[model_callback])

Train on 28192 samples, validate on 1554 samples
Epoch 1/30
 - 20s - loss: 5.3788 - acc: 0.9106 - val_loss: 0.8365 - val_acc: 0.7175
Epoch 2/30
 - 13s - loss: 0.4778 - acc: 0.9099 - val_loss: 0.7739 - val_acc: 0.7767
Epoch 3/30
 - 13s - loss: 0.4330 - acc: 0.9138 - val_loss: 0.7572 - val_acc: 0.7793
Epoch 4/30
 - 13s - loss: 0.3929 - acc: 0.9194 - val_loss: 0.7626 - val_acc: 0.5772
Epoch 5/30
 - 13s - loss: 0.3799 - acc: 0.9203 - val_loss: 0.6845 - val_acc: 0.7510
Epoch 6/30
 - 13s - loss: 0.3517 - acc: 0.9237 - val_loss: 0.8898 - val_acc: 0.5328
Epoch 7/30
 - 13s - loss: 0.3381 - acc: 0.9237 - val_loss: 1.7303 - val_acc: 0.5328
Epoch 8/30
 - 13s - loss: 0.3198 - acc: 0.9264 - val_loss: 0.5658 - val_acc: 0.8012
Epoch 9/30
 - 13s - loss: 0.3108 - acc: 0.9266 - val_loss: 0.6047 - val_acc: 0.7465
Epoch 10/30
 - 14s - loss: 0.2961 - acc: 0.9272 - val_loss: 0.6102 - val_acc: 0.7690
Epoch 11/30
 - 14s - loss: 0.2807 - acc: 0.9301 - val_loss: 0.6956 - val_acc: 0.6371
Epoch 12/30
 - 13s - loss

<keras.callbacks.History at 0x7fff5e48e8d0>

In [11]:
loss, acc = model.evaluate(test_list_x, test_list_y)
print("loss", loss, "acc", acc)

loss 1.3035755996164438 acc 0.640926640926641
