In [1]:
# imports
from pathlib import Path
import numpy as np
from scipy.signal import resample
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout, BatchNormalization, MaxPooling1D, AveragePooling1D
from keras import regularizers
from keras import initializers
from keras.optimizers import Adam, SGD, Adagrad
from keras.utils import np_utils
from keras.callbacks import TensorBoard

Using TensorFlow backend.


## cnn_1 seems too complex. Make a simpler model.

In [2]:
## data loading
## data loading
datapath = Path('../dataset')
xfile = 'X_features_spec.npy'
yfile = 'Y_labels_spec.npy'
def load_waveforms():
    X_list = np.load(str(datapath.joinpath(xfile)))
    Y_list = np.load(str(datapath.joinpath(yfile)))
    return X_list, Y_list

def positve_samples(xlist):
    ## some samples have negative signs
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        for p in range(points.shape[0]):
            point = points[p]
            if np.sum(point) < 0:
                points[p] = -point
        xl_new.append(points)
    return np.array(xl_new)

## apply to loaded dataset
def split_by_channel(xlist):
    ## input as (n, 2500)
    def standard_resample(arr):
        return resample(arr, 2500)
    ## if some is not with dim 625, resample it
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        if points.shape[1] != 2500:
            print("resample")
            print(points.shape)
            points = np.apply_along_axis(standard_resample, axis=1, arr=points)
        points = points.reshape((points.shape[0], 625, 4))
        xl_new.append(points)
    return np.array(xl_new)

## input is after split
def apply_resample(xlist, outdim):
    ## resample
    def resample_waveform(arr):
        ## arr.shape: (indim, )
        return resample(arr, outdim)
    xl_new = []
    for sample in range(xlist.shape[0]):
        points = xlist[sample]
        points = np.apply_along_axis(resample_waveform, axis=1, arr=points)
        xl_new.append(points)
    return np.array(xl_new)

## input is combined exp. (18000 ,625, 4)
def get_xtrain_mean(x_train):
    ## mean value for each dimension (exp. each of 625 dim)
    m = np.mean(x_train, axis=0)
    ## then we can apply x_train - m for zero mean
    return m

## input is after split
## one variance for each channel
def normalize_waveform():
    ## we don't necessarily need this
    pass

def combine_samples(arrs):
    ## exp. arrs.shape: (20, ?)
    pass

def binary_label(ylist):
    ## 1, 2 --> 1
    ylist_new = []
    for sample in range(ylist.shape[0]):
        labels = ylist[sample]
        labels[labels > 1] = 1
        ylist_new.append(labels)
    return np.array(ylist_new)

def combine_samples(arrs):
    ## exp. arrs.shape: (20, ?)
    if arrs.shape[0] < 1:
        return arrs
    sp = list(arrs[0].shape)
    sp[0] = 0
    combined = np.zeros(sp)
    print("combinde", combined.shape)
    for sample in range(arrs.shape[0]):
        arr = arrs[sample]
        combined = np.concatenate((combined, arr), axis=0)
    return combined

In [3]:
## global parameters

In [4]:
## one time parameter for the model below
## regularizer
## l2
ker_reg = 0.1
act_reg = 0.1
## kernel_initializer
ker_init = initializers.glorot_normal(seed=None)
## shape
in_shape = (648, 4)
## learning rate
opt = Adam()
opt.lr = 0.0001
##
OUTPUT_SIZE = 2
##
epochs = 20
## callback
model_callback = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)

this time, add a conv layer with bigger kernel size and stride at first --> more generalized?

In [5]:
## model
## resample data to 640 * 4
model = Sequential()
## 1d conv, size 20 filter, 16 filters, stride 2
## batch norm, batch after activation
## maxpool 2 --> 160 * 16
## keras.layers.Conv1D(filters, kernel_size, strides=1, padding='valid', dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)
model.add(Convolution1D(filters=16, kernel_size=20, strides=2, padding='same', input_shape=in_shape, kernel_initializer=ker_init, activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
## 1d conv, size 4 filter, 32 filters, stride 2
## batch norm, batch after activation
## maxpool 3 --> 40 * 32
model.add(Convolution1D(filters=32, kernel_size=4, strides=2, padding='same', input_shape=in_shape, activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
## 1d conv, size 3 filter, 128 filters, stride 2
## batch norm, batch after activation
## max pool 3 -->  10 * 64
model.add(Convolution1D(filters=64, kernel_size=3, strides=2, padding='same', activation='relu', kernel_regularizer=regularizers.l2(ker_reg)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
## average pool 10 -->  1 * 64
model.add(AveragePooling1D(pool_size=10))
##
model.add(Flatten())
## fully connected
model.add(Dense(OUTPUT_SIZE))
## softmax
model.add(Activation('softmax'))

In [6]:
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 324, 16)           1296      
_________________________________________________________________
batch_normalization_1 (Batch (None, 324, 16)           64        
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 162, 16)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 81, 32)            2080      
_________________________________________________________________
batch_normalization_2 (Batch (None, 81, 32)            128       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 40, 32)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 20, 64)            6208      
__________

In [7]:
## experiment
## case 12, 14 for test
## case 18 for validation
## other case for traning
x_list, y_list = load_waveforms()
x_list = positve_samples(x_list)
x_list = split_by_channel(x_list)
x_list = apply_resample(x_list, 648)
y_list = binary_label(y_list)
for i in range(x_list.shape[0]):
    print(x_list[i].shape)

resample
(2490, 1920)
resample
(878, 1920)
(2394, 648, 4)
(4144, 648, 4)
(3302, 648, 4)
(1272, 648, 4)
(389, 648, 4)
(2716, 648, 4)
(61, 648, 4)
(628, 648, 4)
(611, 648, 4)
(771, 648, 4)
(201, 648, 4)
(1354, 648, 4)
(2490, 648, 4)
(878, 648, 4)
(2506, 648, 4)
(1688, 648, 4)
(2067, 648, 4)
(1554, 648, 4)
(635, 648, 4)
(1439, 648, 4)


In [8]:
val_idx = [1, 5]
test_idx = [6]
train_list_x = []
train_list_y = []
val_list_x = []
val_list_y = []
test_list_x = []
test_list_y = []
for idx in range(x_list.shape[0]):
    if idx not in (val_idx + test_idx):
        train_list_x.append(x_list[idx])
        train_list_y.append(y_list[idx])
        
for idx in val_idx:
    val_list_x.append(x_list[idx])
    val_list_y.append(y_list[idx])
    
for idx in test_idx:
    test_list_x.append(x_list[idx])
    test_list_y.append(y_list[idx])  

train_list_x = np.array(train_list_x)
train_list_y = np.array(train_list_y)
val_list_x = np.array(val_list_x)
val_list_y = np.array(val_list_y)
test_list_x = np.array(test_list_x)
test_list_y = np.array(test_list_y)
train_list_x = combine_samples(train_list_x)
train_list_y = combine_samples(train_list_y)
val_list_x = combine_samples(val_list_x)
val_list_y = combine_samples(val_list_y)
test_list_x = combine_samples(test_list_x)
test_list_y = combine_samples(test_list_y)
train_list_y = np_utils.to_categorical(train_list_y, num_classes=2)
val_list_y = np_utils.to_categorical(val_list_y, num_classes=2)
test_list_y = np_utils.to_categorical(test_list_y, num_classes=2)

combinde (0, 648, 4)
combinde (0,)
combinde (0, 648, 4)
combinde (0,)
combinde (0, 648, 4)
combinde (0,)


In [9]:
model.fit(train_list_x, train_list_y,
          epochs=epochs,
          verbose=2,
          validation_data=(val_list_x, val_list_y),
          callbacks=[model_callback])

Train on 24179 samples, validate on 6860 samples
Epoch 1/20
 - 8s - loss: 4.3996 - acc: 0.8738 - val_loss: 2.5144 - val_acc: 0.7608
Epoch 2/20
 - 7s - loss: 1.5094 - acc: 0.9207 - val_loss: 1.2666 - val_acc: 0.7016
Epoch 3/20
 - 7s - loss: 0.7768 - acc: 0.9279 - val_loss: 0.7952 - val_acc: 0.8751
Epoch 4/20
 - 7s - loss: 0.5520 - acc: 0.9319 - val_loss: 1.1250 - val_acc: 0.4778
Epoch 5/20
 - 7s - loss: 0.4522 - acc: 0.9335 - val_loss: 0.5303 - val_acc: 0.9204
Epoch 6/20
 - 7s - loss: 0.3936 - acc: 0.9355 - val_loss: 0.7719 - val_acc: 0.7045
Epoch 7/20
 - 7s - loss: 0.3550 - acc: 0.9366 - val_loss: 1.1549 - val_acc: 0.6327
Epoch 8/20
 - 7s - loss: 0.3273 - acc: 0.9365 - val_loss: 0.5479 - val_acc: 0.8436
Epoch 9/20
 - 7s - loss: 0.3080 - acc: 0.9381 - val_loss: 0.6187 - val_acc: 0.7475
Epoch 10/20
 - 7s - loss: 0.2918 - acc: 0.9382 - val_loss: 0.4218 - val_acc: 0.8516
Epoch 11/20
 - 7s - loss: 0.2803 - acc: 0.9387 - val_loss: 0.6574 - val_acc: 0.7194
Epoch 12/20
 - 7s - loss: 0.2688 - a

<keras.callbacks.History at 0x7fff62d73d68>

In [11]:
loss, acc = model.evaluate(test_list_x, test_list_y)
print("loss", loss, "acc", acc)

loss 4.813035878978792 acc 0.01639344262295082
