In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import math
from datetime import date

import keras
from keras import optimizers, losses, activations, models
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from keras.layers import Dense, Input, Dropout, Convolution1D, MaxPool1D, GlobalMaxPool1D, GlobalAveragePooling1D, Concatenate
from sklearn.metrics import f1_score, accuracy_score


Using TensorFlow backend.


In [3]:
df_train = pd.read_csv("C:/00ETH/ml4h/Project1/archive/mitbih_train.csv", header=None)
df_train = df_train.sample(frac=1)
df_test = pd.read_csv("C:/00ETH/ml4h/Project1/archive/mitbih_test.csv", header=None)

Y = np.array(df_train[187].values).astype(np.int8)
X = np.array(df_train[list(range(187))].values)[..., np.newaxis]

Y_test = np.array(df_test[187].values).astype(np.int8)
X_test = np.array(df_test[list(range(187))].values)[..., np.newaxis]


In [4]:
# Transformation 1 -> smoothing original time series

def smooth_data(data, window_size):
    rolling_transform = data.rolling(window=window_size)
    rolling_mean = rolling_transform.mean()
    return rolling_mean    

X_smooth_small = smooth_data(df_train, 3)
X_smooth_medium = smooth_data(df_train, 9)
X_smooth_large = smooth_data(df_train, 15)

X_test_smooth_small = smooth_data(df_test, 3)
X_test_smooth_medium = smooth_data(df_test, 9)
X_test_smooth_large = smooth_data(df_test, 15)

X_smooth_small = X_smooth_small.sample(frac=1)
X_smooth_medium = X_smooth_medium.sample(frac=1)
X_smooth_large = X_smooth_large.sample(frac=1)

X_test_smooth_small = X_test_smooth_small.sample(frac=1)
X_test_smooth_medium = X_test_smooth_medium.sample(frac=1)
X_test_smooth_large = X_test_smooth_large.sample(frac=1)

X_smooth_small = np.array(X_smooth_small[list(range(187))].values)[..., np.newaxis]
X_smooth_medium = np.array(X_smooth_medium[list(range(187))].values)[..., np.newaxis]
X_smooth_large = np.array(X_smooth_large[list(range(187))].values)[..., np.newaxis]

X_test_smooth_small = np.array(X_test_smooth_small[list(range(187))].values)[..., np.newaxis]
X_test_smooth_medium = np.array(X_test_smooth_medium[list(range(187))].values)[..., np.newaxis]
X_test_smooth_large = np.array(X_test_smooth_large[list(range(187))].values)[..., np.newaxis]

In [5]:
# Transformation 2 -> downsample original time series
def downsample_data(data, factor, sample_rate):
    data_return = np.zeros([data.shape[0], math.ceil(data.shape[1]/factor)])
    start = date(2000, 1, 1)
    end = start + dt.timedelta(days=data.shape[1]-1)
    index = pd.date_range(start, end)
    for i in range(0,data.shape[0]-1):
        s = pd.Series(data[i,:,0], index=index)
        s = s.resample(sample_rate)
        sample_mean = s.mean()
        sample_mean = sample_mean.to_numpy()
        data_return[i,:] = sample_mean
    return data_return

X_sample_small = downsample_data(X, 2, '2D')
X_sample_medium = downsample_data(X, 4, '4D')
X_sample_large = downsample_data(X, 8, '8D')

X_test_sample_small = downsample_data(X_test, 2, '2D')
X_test_sample_medium = downsample_data(X_test, 4, '4D')
X_test_sample_large = downsample_data(X_test, 8, '8D')
    

In [6]:
print(X.shape)
print(X_smooth_small.shape)
print(X_sample_small.shape)
print(X_sample_medium.shape)
print(X_sample_large.shape)

(87554, 187, 1)
(87554, 187, 1)
(87554, 94)
(87554, 47)
(87554, 24)


In [7]:
def get_base_model(input_len, filter_size):
        inp = Input(shape=(input_len, 1))
        img = Convolution1D(32, kernel_size=filter_size, activation=activations.tanh, padding="same")(inp)
        img = Convolution1D(32, kernel_size=filter_size, activation=activations.tanh, padding="same")(img)
        img = MaxPool1D(pool_size=2)(img)
        
        dense = Dense(64,activation=activations.relu, name="dense_1")(img)
        dense = Dropout(0.3)(dense)
        model = models.Model(inputs = inp, outputs = dense)
        return model

In [8]:

def get_model(inputs_len = [187,49 ,47 , 24], filter_sizes = [64, 32, 16, 8]):
    nclass = 5
    
    inp_smallseq = Input(shape=(inputs_len[1], 1))
    inp_mediumseq = Input(shape=(inputs_len[2], 1))
    inp_largeseq = Input(shape=(inputs_len[3], 1))
    
    inp_smooth1 = Input(shape=(inputs_len[0], 1))   
    inp_smooth2 = Input(shape=(inputs_len[0], 1))   
    inp_smooth3 = Input(shape=(inputs_len[0], 1))   
    inp_org = Input(shape=(inputs_len[0], 1))
    
    base_net_org = get_base_model(inputs_len[0], filter_sizes[0])
    
    #smoothing
    base_net1 = get_base_model(inputs_len[0], filter_sizes[0])
    base_net2 = get_base_model(inputs_len[0], filter_sizes[0])
    base_net3 = get_base_model(inputs_len[0], filter_sizes[0])
    
    #sampling
    base_net_small = get_base_model(inputs_len[1], filter_sizes[1])
    base_net_medium = get_base_model(inputs_len[2], filter_sizes[2])
    base_net_large = get_base_model(inputs_len[3], filter_sizes[3])
    
    embedding_org = base_net_org(inp_org)
    
    #smoothing
    embedding_smooth1 = base_net_org(inp_smooth1)
    embedding_smooth2 = base_net_org(inp_smooth2)
    embedding_smooth3 = base_net_org(inp_smooth3)
    
    #sampling
    embedding_small = base_net_org(inp_smallseq)
    embedding_medium = base_net_org(inp_mediumseq)
    embedding_large = base_net_org(inp_largeseq)
    
    # merge all the outputs
    merged = Concatenate()([embedding_org, embedding_smooth1, embedding_smooth2, embedding_smooth3 ]) 
    #merged = Concatenate()([embedding_org,embedding_smooth1, embedding_smooth2, embedding_smooth3, embedding_small, embedding_medium, embedding_large ]) 
    
    merged = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(merged)
    merged = GlobalMaxPool1D()(merged)
    merged = Dropout(rate=0.2)(merged)
    
    dense_1 = Dense(64, activation=activations.relu, name="dense_1")(merged)
    out = Dense(nclass, activation=activations.softmax, name="dense_3_mitbih")(dense_1)

    model = models.Model(inputs=[inp_org,inp_smooth1, inp_smooth2, inp_smooth3 ], outputs=out)

    #model = models.Model(inputs=[inp_org,inp_smooth1, inp_smooth2, inp_smooth3, inp_smallseq, inp_mediumseq, inp_largeseq ], outputs=out)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model

In [9]:
model = get_model(inputs_len = [187,49 ,47 , 24], filter_sizes = [64, 32, 16, 8])
file_path = "baseline_cnn_mitbih.h5"
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early = EarlyStopping(monitor="val_acc", mode="max", patience=5, verbose=1)
redonplat = ReduceLROnPlateau(monitor="val_acc", mode="max", patience=3, verbose=2)
callbacks_list = [checkpoint, early, redonplat]  # early






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 187, 1)       0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 187, 1)       0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            (None, 187, 1)       0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 187, 1)       0                                

In [12]:
#Y = keras.utils.to_categorical(Y)
X_train = [X,X_smooth_small, X_smooth_medium, X_smooth_large ]

In [None]:
model.fit(X_train, Y, epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)
model.load_weights(file_path)

pred_test = model.predict([X_test,X_test_smooth_small, X_test_smooth_medium, X_test_smooth_large ])
pred_test = np.argmax(pred_test, axis=-1)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 78798 samples, validate on 8756 samples
Epoch 1/1000
 - 277s - loss: nan - acc: 0.8287 - val_loss: nan - val_acc: 0.8210

Epoch 00001: val_acc improved from -inf to 0.82104, saving model to baseline_cnn_mitbih.h5
Epoch 2/1000
 - 280s - loss: nan - acc: 0.8285 - val_loss: nan - val_acc: 0.8210

Epoch 00002: val_acc did not improve from 0.82104
Epoch 3/1000
 - 281s - loss: nan - acc: 0.8285 - val_loss: nan - val_acc: 0.8210

Epoch 00003: val_acc did not improve from 0.82104
Epoch 4/1000
 - 283s - loss: nan - acc: 0.8285 - val_loss: nan - val_acc: 0.8210

Epoch 00004: val_acc did not improve from 0.82104

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 5/1000
 - 281s - loss: nan - acc: 0.8285 - val_loss: nan - val_acc: 0.8210

Epoch 00005: val_acc did not improve from 0.82104
Epoch 6/1000


In [None]:
f1 = f1_score(Y_test, pred_test, average="macro")

print("Test f1 score : %s "% f1)

acc = accuracy_score(Y_test, pred_test)

print("Test accuracy score : %s "% acc)

In [None]:
def get_model(input_len = [187, , , , , , ], filter_sizes = [32, 32, 32, 32, 32, 16, 8]):
    nclass = 5
    inp = Input(shape=(187, 1))
    img_1 = Convolution1D(32, kernel_size=5, activation=activations.relu, padding="same")(inp)
    img_1 = Convolution1D(32, kernel_size=5, activation=activations.relu, padding="same")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="same")(img_1)
    img_1 = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="same")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(64, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.2)(img_1)

    dense_1 = Dense(64, activation=activations.relu, name="dense_1")(img_1)
    dense_1 = Dense(64, activation=activations.relu, name="dense_2")(dense_1)
    dense_1 = Dense(nclass, activation=activations.softmax, name="dense_3_mitbih")(dense_1)

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.sparse_categorical_crossentropy, metrics=['acc'])
    model.summary()
    return model