In [95]:
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from scipy.signal import decimate
from sklearn.model_selection import train_test_split

In [96]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPool1D, GlobalAvgPool1D, Dropout, BatchNormalization, Dense
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping
from keras.utils import np_utils
from keras.regularizers import l2

In [97]:
INPUT_LIB = ''
SAMPLE_RATE = 4000
CLASSES = ['normal', 'murmur']
CODE_BOOK = {x:i for i,x in enumerate(CLASSES)}
print(CODE_BOOK)
NB_CLASSES = len(CLASSES)
print(NB_CLASSES)

{'normal': 0, 'murmur': 1}
2


In [98]:
def clean_filename(fname, string):   
    file_name = fname.split('_',3)[2]
    fn=fname.split('_',3)[3]
    return file_name+'__'+fn
def load_wav_file(name, path):
    _, b = wavfile.read(path + name)
#     print(_)
    assert _ == SAMPLE_RATE
    return b
def repeat_to_length(arr, length):
    """Repeats the numpy 1D array to given length, and makes datatype float"""
    result = np.empty((length, ), dtype = 'float32')
    l = len(arr)
    pos = 0
    while pos + l <= length:
        result[pos:pos+l] = arr
        pos += l
    if pos < length:
        result[pos:length] = arr[:length-pos]
    return result

In [99]:
df = pd.read_csv(INPUT_LIB + 'set_b_new.csv')

df['fname'] = df['fname'].apply(clean_filename, string='Bunlabelledtest')

# df['label'] = df['label'].fillna('unclassified')

# #print(df[150:])

df['time_series'] = df['fname'].apply(load_wav_file, path=INPUT_LIB + 'set_b/')    
df['len_series'] = df['time_series'].apply(len)
MAX_LEN = max(df['len_series'])
print(MAX_LEN)
df['time_series'] = df['time_series'].apply(repeat_to_length, length=MAX_LEN) 
df['len_series'] = df['time_series'].apply(len)

print("OK")
print(df)


96640
OK
    dataset                             fname   label  sublabel  \
0         b   murmur__112_1306243000964_A.wav  murmur       NaN   
1         b   murmur__112_1306243000964_B.wav  murmur       NaN   
2         b   murmur__112_1306243000964_D.wav  murmur       NaN   
3         b   murmur__116_1306258689913_A.wav  murmur       NaN   
4         b   murmur__116_1306258689913_C.wav  murmur       NaN   
5         b   murmur__116_1306258689913_D.wav  murmur       NaN   
6         b   murmur__122_1306325762831_C.wav  murmur       NaN   
7         b   murmur__122_1306325762831_D.wav  murmur       NaN   
8         b   murmur__156_1306936373241_B.wav  murmur       NaN   
9         b   murmur__160_1307100683334_A.wav  murmur       NaN   
10        b   murmur__160_1307100683334_B.wav  murmur       NaN   
11        b   murmur__161_1307101199321_A.wav  murmur       NaN   
12        b   murmur__162_1307101835989_A.wav  murmur       NaN   
13        b   murmur__162_1307101835989_B.wav  murmur

In [100]:
x_data = np.stack(df['time_series'].values, axis=0)
print(x_data)

[[  934.   782.   697. ...,  -377.  -402.  -289.]
 [  817.   879.   770. ...,  3785.  3701.  3205.]
 [ 1733.  2192.  2593. ...,    59.    52.    67.]
 ..., 
 [ 1092.   622.   571. ...,   368.    82.  -336.]
 [ -486.  -147.  -153. ...,   309.   251.   171.]
 [  683.   823.   780. ...,   185.    69.  -118.]]


In [101]:
new_labels =[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,]
print(len(new_labels))
new_labels = np.array(new_labels, dtype='int')
y_data = np_utils.to_categorical(new_labels)
print (y_data)

132
[[ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 0.  1.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 [ 1. 

In [102]:
x_train, x_test, y_train, y_test, train_filenames, test_filenames = train_test_split(x_data, y_data, df['fname'].values, test_size=0.20)

In [103]:
x_train, x_test, y_train, y_test, train_filenames, test_filenames = train_test_split(x_data, y_data, df['fname'].values, test_size=0.20)

In [104]:
print(len(x_train))
x_train = decimate(x_train, 8, axis=1, zero_phase=True)
x_train = decimate(x_train, 8, axis=1, zero_phase=True)
x_train = decimate(x_train, 4, axis=1, zero_phase=True)
x_test = decimate(x_test, 8, axis=1, zero_phase=True)
x_test = decimate(x_test, 8, axis=1, zero_phase=True)
x_test = decimate(x_test, 4, axis=1, zero_phase=True)
print(x_train)

105
[[ -1.52261236e+02   6.97962294e+01   1.74102681e+01 ...,  -2.25184404e+01
   -4.09072940e+01   1.80502524e+02]
 [ -5.60964941e+01  -2.96922252e+01  -3.03894462e+01 ...,  -1.93122640e+01
    1.33183353e+02  -2.50412673e+02]
 [  6.73191903e+00  -2.30098934e+01   8.71595496e+01 ...,   1.49426598e+02
   -9.04920641e+01   1.05783393e+02]
 ..., 
 [  1.72083871e+03  -3.77959175e+02   1.63148895e+02 ...,   5.26741897e+01
   -6.10588105e-02   1.34744548e+02]
 [ -8.16803072e+02   3.07251451e+02  -1.37400668e+01 ...,  -9.79700123e+01
    1.10649212e+02  -1.53449640e+01]
 [  2.16258720e+02  -7.15465158e+01   1.02921984e+02 ...,  -9.94204416e+00
    1.22899368e+02  -1.62661723e+02]]


In [105]:
#Scale each observation to unit variance, it should already have mean close to zero.
x_train = x_train / np.std(x_train, axis=1).reshape(-1,1)
x_test = x_test / np.std(x_test, axis=1).reshape(-1,1)
print(x_train.shape)

(105, 378)


In [106]:
x_train = x_train[:,:,np.newaxis]
x_test = x_test[:,:,np.newaxis]
print(x_train.shape)

(105, 378, 1)


In [108]:
model = Sequential()
model.add(Conv1D(filters=4, kernel_size=9, activation='relu',
                input_shape = x_train.shape[1:],
                kernel_regularizer = l2(0.025)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
# model.add(Conv1D(filters=4, kernel_size=9, activation='relu',
#                 kernel_regularizer = l2(0.05)))
# model.add(MaxPool1D(strides=4))
# model.add(BatchNormalization())
model.add(Conv1D(filters=8, kernel_size=9, activation='relu',
                 kernel_regularizer = l2(0.1)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
# model.add(Conv1D(filters=16, kernel_size=9, activation='relu'))
# model.add(MaxPool1D(strides=4))
# model.add(BatchNormalization())
# model.add(Dropout(0.25))
model.add(Conv1D(filters=64, kernel_size=4, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Conv1D(filters=32, kernel_size=1, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.75))
model.add(GlobalAvgPool1D())
model.add(Dense(3, activation='softmax'))

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [109]:
def batch_generator(x_train, y_train, batch_size):
    """
    Rotates the time series randomly in time
    """
    x_batch = np.empty((batch_size, x_train.shape[1], x_train.shape[2]), dtype='float32')
    y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32')
    full_idx = range(x_train.shape[0])
    
    while True:
        batch_idx = np.random.choice(full_idx, batch_size)
        x_batch = x_train[batch_idx]
        y_batch = y_train[batch_idx]
    
        for i in range(batch_size):
            sz = np.random.randint(x_batch.shape[1])
            x_batch[i] = np.roll(x_batch[i], sz, axis = 0)
     
        yield x_batch, y_batch

In [110]:
weight_saver = ModelCheckpoint('set_a_weights.h5', monitor='val_loss', 
                               save_best_only=True, save_weights_only=True)


print(x_train)

[[[ -4.86151067e+00]
  [  2.22850624e+00]
  [  5.55888069e-01]
  ..., 
  [ -7.18985616e-01]
  [ -1.30611870e+00]
  [  5.76321966e+00]]

 [[ -1.27941191e+00]
  [ -6.77200728e-01]
  [ -6.93102486e-01]
  ..., 
  [ -4.40461406e-01]
  [  3.03755824e+00]
  [ -5.71124742e+00]]

 [[  1.07640754e-01]
  [ -3.67919201e-01]
  [  1.39364713e+00]
  ..., 
  [  2.38927289e+00]
  [ -1.44693273e+00]
  [  1.69143511e+00]]

 ..., 
 [[  1.68535971e+01]
  [ -3.70166688e+00]
  [  1.59785210e+00]
  ..., 
  [  5.15881916e-01]
  [ -5.97999444e-04]
  [  1.31966483e+00]]

 [[ -5.67425422e+00]
  [  2.13444697e+00]
  [ -9.54509533e-02]
  ..., 
  [ -6.80588473e-01]
  [  7.68669681e-01]
  [ -1.06600023e-01]]

 [[  2.09873883e+00]
  [ -6.94341719e-01]
  [  9.98833090e-01]
  ..., 
  [ -9.64851461e-02]
  [  1.19270879e+00]
  [ -1.57859288e+00]]]


In [111]:
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.8**x)

In [112]:
hist = model.fit_generator(batch_generator(x_train, y_train, 8),
                   epochs=30, steps_per_epoch=1000,
                   validation_data=(x_test, y_test),
                   callbacks=[weight_saver, annealer],
                   verbose=2)

Instructions for updating:
Use tf.cast instead.


ValueError: Error when checking target: expected dense_1 to have shape (3,) but got array with shape (2,)

In [113]:
model.load_weights('set_a_weights.h5')

ValueError: You are trying to load a weight file containing 13 layers into a model with 9 layers.

In [None]:
plt.plot(hist.history['loss'], color='b')
plt.plot(hist.history['val_loss'], color='r')
plt.show()
plt.plot(hist.history['acc'], color='b')
plt.plot(hist.history['val_acc'], color='r')
plt.show()

In [None]:
y_hat = model.predict(x_test)
np.set_printoptions(precision=2, suppress=True)
for i in range(3):
    plt.plot(y_hat[:,i], c='r')
    plt.plot(y_test[:,i], c='b')
    plt.show()
    print(CLASSES[i])

In [None]:
y_pred = np.argmax(y_hat, axis=1)
y_true = np.argmax(y_test, axis=1)
for i in range(len(y_true)):
    if y_pred[i] != y_true[i]:
        print("File: {}, Pred: {}, True: {}".format(
            test_filenames[i],
            CLASSES[y_pred[i]], CLASSES[y_true[i]]))
        plt.plot(x_test[i])
        plt.show()
    else:
        print("OK")