In [1]:
import numpy as np
import h5py
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Permute
from keras.layers import ConvLSTM2D, Conv2D, MaxPooling2D
from keras.layers import MaxPooling1D, Conv1D
from keras.layers import GRU, LSTM, BatchNormalization
from keras.layers import TimeDistributed
from keras.layers import Flatten, Reshape, Dropout
from keras.utils import to_categorical
from keras.layers import Activation
from keras import regularizers

from sklearn.metrics import confusion_matrix

from UtilNNDL import create_window_data
from UtilNNDL import plot_hist
from UtilNNDL import prepare_data
from UtilNNDL import plot_confusion_matrix
from UtilNNDL import bandpass_cnt
from UtilNNDL import exponential_running_standardize

RuntimeError: module compiled against API version 0xb but this version of numpy is 0xa

RuntimeError: module compiled against API version 0xb but this version of numpy is 0xa

Using TensorFlow backend.


In [2]:
#Prepare the data by taking out nans and dividing into test and train
file_path = 'datasets/'
file_path = '/home/carla/Downloads/project_datasets/project_datasets/'
train_data, test_data, train_labels, test_labels = prepare_data(file_path, 
                                                                num_test_samples = 50, 
                                                                verbose= False, 
                                                                return_all=True,
                                                                num_files =9)

In [8]:
print train_data.shape
print train_labels.shape

# Test on the first 50 samples of subject 1
test_data = test_data[0:50]
test_labels = test_labels[0:50]

print test_data.shape
print test_labels.shape


(2108, 1000, 22)
(2108, 4)
(50, 1000, 22)
(50, 4)


In [9]:
#assist numerical stability
train_data = train_data*(1e6)
test_data = test_data*(1e6)
train_data = train_data.swapaxes(1,2)
test_data = test_data.swapaxes(1,2)
print train_data.shape
print test_data.shape
for i,a in enumerate(train_data):
    train_data[i] = bandpass_cnt(a, 4, 38, 250, filt_order=3)
for i,a in enumerate(test_data):
    test_data[i] = bandpass_cnt(a, 4, 38, 250, filt_order=3)    
print train_data.shape
print test_data.shape

(2108, 22, 1000)
(50, 22, 1000)
(2108, 22, 1000)
(50, 22, 1000)


In [11]:
#Standardize the data
for i,a in enumerate(train_data):
    train_data[i] = exponential_running_standardize(a, factor_new=0.001, init_block_size=1000, eps=1e-4)
for i,a in enumerate(test_data):
    test_data[i] = exponential_running_standardize(a, factor_new=0.001, init_block_size=1000, eps=1e-4)
train_data = train_data.swapaxes(1,2)
test_data = test_data.swapaxes(1,2) 
print train_data.shape
print test_data.shape

(2108, 22, 1000)
(50, 22, 1000)


In [16]:
#Augment the data into a bigger set by windowing
train_data_sliced, train_labels_sliced = create_window_data(train_data, train_labels, windows=10)
test_data_sliced, test_labels_sliced = create_window_data(test_data, test_labels, windows=10)
print train_data_sliced.shape
print train_labels_sliced.shape
print test_data_sliced.shape
print test_labels_sliced.shape 

(21080, 22, 512)
(21080, 4)
(500, 22, 512)
(500, 4)


In [17]:
test_data_sliced = test_data_sliced[:500,:]
test_labels_sliced = test_labels_sliced[:500,:]
print test_data_sliced.shape
print test_labels_sliced.shape 

(500, 22, 512)
(500, 4)


In [18]:
traindata2d = train_data_sliced.reshape(train_data_sliced.shape[0], train_data_sliced.shape[2], train_data_sliced.shape[1], 1)
testdata2d = test_data_sliced.reshape(test_data_sliced.shape[0], test_data_sliced.shape[2], test_data_sliced.shape[1], 1)
print traindata2d.shape
print testdata2d.shape

(21080, 512, 22, 1)
(500, 512, 22, 1)


# TESTING


Trying out:
    * X batchnorm before nonlinearity
    * X regularization
    * dropout 0.5 applied to inputs to conv layers after the first
    * “new tied loss function” for further regularization

In [26]:
num_filters = 20 #15 performs similar
model = Sequential()
model.add(Conv2D(num_filters, kernel_size=(num_filters,1), data_format='channels_last', input_shape=(traindata2d.shape[1], traindata2d.shape[2], traindata2d.shape[3]), kernel_initializer='glorot_normal', bias_initializer='glorot_normal'))  
model.add(Conv2D(num_filters, kernel_size=(15,num_filters), activation='relu', kernel_initializer='glorot_normal', bias_initializer='glorot_normal') ) 
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Permute((1,3,2)))
model.add(Dropout(0.5))

model.add(Conv2D(num_filters*2, kernel_size=(10,num_filters), activation='relu', kernel_initializer='glorot_normal', bias_initializer='glorot_normal')) 
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Permute((1,3,2)))
model.add(Dropout(0.5))

model.add(Conv2D(num_filters*4, kernel_size=(10,num_filters*2), activation='relu', kernel_initializer='glorot_normal', bias_initializer='glorot_normal')) 
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Permute((1,3,2)))
model.add(Dropout(0.5))

model.add(Conv2D(num_filters*8, kernel_size=(10,num_filters*4), activation='relu', kernel_initializer='glorot_normal', bias_initializer='glorot_normal')) 
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,1)))
model.add(Reshape((num_filters*8,1)))
#model.add(LSTM(32, return_sequences=True))

model.add(Flatten())
model.add(Dense(4, activation='softmax', kernel_initializer='glorot_normal', bias_initializer='glorot_normal'))
model.compile(optimizer = 'rmsprop',
             loss = 'categorical_crossentropy',
             metrics=['accuracy'])
model.summary()
hist = model.fit(traindata2d,train_labels_sliced,epochs=30,validation_split=0.25,batch_size=16)
#model.test_on_batch(test_data, test_labels)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 493, 22, 20)       420       
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 479, 3, 20)        120020    
_________________________________________________________________
batch_normalization_13 (Batc (None, 479, 3, 20)        80        
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 159, 3, 20)        0         
_________________________________________________________________
permute_10 (Permute)         (None, 159, 20, 3)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 159, 20, 3)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 150, 1, 40)        24040     
__________

In [27]:
# Modify test data to run model on another subject
_, test_data_orig, _, test_labels_orig = prepare_data(file_path, 
                                            num_test_samples=50, 
                                            verbose=False, 
                                            return_all=False,
                                            num_files=9)


In [28]:
test_data_all = []
test_labels_all = []
for n in range(1,10):
    test_data = test_data_orig['A0{}T'.format(n)]
    test_labels = test_labels_orig['A0{}T'.format(n)]
    #print test_data.shape
    #print test_labels.shape

    #assist numerical stability
    test_data = test_data*(1e6)
    test_data = test_data.swapaxes(1,2)
    for i,a in enumerate(test_data):
        test_data[i] = bandpass_cnt(a, 4, 38, 250, filt_order=3)    
    #print test_data.shape

    #standardize
    for i,a in enumerate(test_data):
        test_data[i] = exponential_running_standardize(a, factor_new=0.001, init_block_size=1000, eps=1e-4)

    test_data = test_data.swapaxes(1,2) 
    test_data_sliced, test_labels_sliced = create_window_data(test_data, test_labels, windows=10)

    test_data_sliced = test_data_sliced[:500,:]
    test_labels_sliced = test_labels_sliced[:500,:]
    #print test_data_sliced.shape
    #print test_labels_sliced.shape 

    testdata2d = test_data_sliced.reshape(test_data_sliced.shape[0], test_data_sliced.shape[2], test_data_sliced.shape[1], 1)
    #print testdata2d.shape
    
    test_data_all.append(testdata2d)
    test_labels_all.append(test_labels_sliced)
    
print(len(test_data_all))
print(len(test_labels_all))
    

9
9


In [29]:
for n in range(0,9):
    testdata2d = test_data_all[n]
    test_labels_sliced = test_labels_all[n]
    test_score = model.evaluate(testdata2d, test_labels_sliced, batch_size=16)
    print "Test Score: {}".format(test_score)
    #plot_hist([hist.history['acc'],hist.history['val_acc']],['Training Accuracy','Val Accuracy'],title='Accuracies')
    #plot_hist([hist.history['loss'],hist.history['val_loss']],['Training Loss','Val Loss'],title='Loss')

    #test_predict = model.predict(testdata2d, batch_size=16)
    #cm = confusion_matrix(np.argmax(test_labels_sliced,axis=1),np.argmax(test_predict,axis=1))
    #unique, count = np.unique(np.argmax(test_labels_sliced,axis=1),return_counts=True)
    #print count 
    #plt.figure(0)
    #plot_confusion_matrix(cm, classes=["1","2","3","4"])

Test Score: [1.764169979572296, 0.38200000000000001]
Test Score: [1.7105364065170288, 0.29799999999999999]
Test Score: [1.6860965385437012, 0.34399999999999997]
Test Score: [1.5207793159484864, 0.374]
Test Score: [2.3204163894653322, 0.19]
Test Score: [2.042624954223633, 0.26200000000000001]
Test Score: [2.1792755298614503, 0.27000000000000002]
Test Score: [2.5227179417610168, 0.23999999999999999]
Test Score: [3.2447946605682372, 0.28000000000000003]


In [43]:
print traindata2d.shape

(21080, 512, 22, 1)
