In [1]:
# CNN + LSTM model
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Flatten
from keras.layers import Dense, Dropout , BatchNormalization
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import TimeDistributed
from keras.utils import to_categorical
from keras import optimizers
from keras.layers import Reshape
from keras.models import load_model
from keras.layers import LSTM
from keras.layers.merge import concatenate
from keras.utils.vis_utils import plot_model
from keras.models import Model
from keras import Input
from sklearn.preprocessing import MinMaxScaler
from sliding_window import sliding_window
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE # doctest: +NORMALIZE_WHITESPACE
from collections import Counter
from sklearn.preprocessing import RobustScaler
from scipy.stats import mode

import os
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Hardcoded number of sensor channels employed in the gyro X challenge - X,Y,Z
SENSOR_CHANNELS = 6
# Hardcoded number of classes in the gesture recognition problem
NUM_CLASSES = 12

# sampling rate: 50Hz, 100 * 1/50 = 2 seconds
# Hardcoded length of the sliding window mechanism employed to segment the data
SLIDING_WINDOW_LENGTH = 100
# Hardcoded step of the sliding window mechanism employed to segment the data (50% overlapping)
SLIDING_WINDOW_STEP = 50

# current path
cwd = os.getcwd()

In [3]:
def debug_print_count(data, string='NA'):
    unique, counts = np.unique(data, return_counts=True)
    print(string)
    print(dict(zip(unique, counts)))

In [4]:
# load a single file as a numpy array
def load_file(filepath):
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values

In [5]:
def load_dataset_group(dataset_type):
    path = cwd+'/{}data.txt'.format(dataset_type)
    data = load_file(path)
    return data[:,0:SENSOR_CHANNELS], data[:,-1]

In [6]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train')
    print("Train set:" + str(trainX.shape) + str(trainy.shape))
    # load all test
    testX, testy = load_dataset_group('test')
    print("Test set:" + str(testX.shape) + str(testy.shape))

    return trainX, trainy, testX, testy

In [7]:
'''
@data_y: take the mode of the array as the activity label.
'''
def har_sliding_window(data_x, data_y, ws, ss):
    data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
    data_y = mode(sliding_window(data_y,ws,ss), axis=1)[0] 
    #data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
    
    return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)

In [8]:
# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [9]:
# Plot the data
def plot_data(original_data, scaled_data=None):
    # plot both together to compare
    #fig, ax=plt.subplots(1,2)
    fig, ax=plt.subplots(1,2)
    sns.distplot(original_data, ax=ax[0], color='y')
    ax[0].set_title("Original Data")
    if scaled_data:
        sns.distplot(scaled_data, ax=ax[1])
        ax[1].set_title("Scaled data")
    plt.show()

In [12]:
from keras.backend import argmax

# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    _verbose, epochs, batch_size = 1, 10, 64

    n_timesteps, n_features, n_outputs = SLIDING_WINDOW_LENGTH, SENSOR_CHANNELS, NUM_CLASSES

    #input 1 window 1;
    inputs1 = Input(shape=(n_timesteps,n_features))
    convs1_1 = Conv1D(filters=16, kernel_size=7, padding='same', activation='relu')(inputs1)
    drops1_1 = Dropout(0.5)(convs1_1)
    pools1_1 = MaxPooling1D(pool_size=2)(drops1_1)
    convs1_2 = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(pools1_1)
    drops1_2 = Dropout(0.5)(convs1_2)
    pools1_2 = MaxPooling1D(pool_size=2)(drops1_2)
    convs1_3 = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(pools1_2)
    drops1_3 = Dropout(0.5)(convs1_3)
    pools1_3 = MaxPooling1D(pool_size=2)(drops1_3)

    #input 2 windows 2;
    inputs2 = Input(shape=(n_timesteps,n_features))
    convs2_1 = Conv1D(filters=16, kernel_size=7, padding='same', activation='relu')(inputs2)
    drops2_1 = Dropout(0.5)(convs2_1)
    pools2_1 = MaxPooling1D(pool_size=2)(drops2_1)
    convs2_2 = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(pools2_1)
    drops2_2 = Dropout(0.5)(convs2_2)
    pools2_2 = MaxPooling1D(pool_size=2)(drops2_2)
    convs2_3 = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(pools2_2)
    drops2_3 = Dropout(0.5)(convs2_3)
    pools2_3 = MaxPooling1D(pool_size=2)(drops2_3)

    merged = concatenate([pools1_3, pools2_3])
    lstm_1 = LSTM(100, activation='relu')(merged)
    dense1 = Dense(100, activation='relu')(lstm_1)
    output = Dense(n_outputs, activation='softmax')(dense1)
    
    model = Model(inputs=[inputs1, inputs2], outputs=output)
    opt = optimizers.Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    model.fit([trainX, trainX], trainy, epochs=epochs, batch_size=batch_size, verbose=_verbose)
    print(model.summary())
    plot_model(model, to_file='HAPT_CNNLSTM_parallel.png', show_shapes=True, show_layer_names=True)
    return model

In [13]:

# run an experiment
def run_experiment(repeats=1):
    # load data
    trainX, trainy, testX, testy = load_dataset()
    
    # prev_trainX = trainX
    scaler = RobustScaler()
    #scaler = MinMaxScaler()
    trainX = scaler.fit_transform(trainX)
    testX = scaler.fit_transform(testX)

    #plot_data(prev_trainX, trainX)
    #sm = SMOTE(sampling_strategy='minority', random_state=42)
    #trainX, trainy = sm.fit_resample(trainX, trainy)
    #print('Resampled dataset shape %s' % Counter(trainy))
    
    # Sensor data is segmented using a sliding window mechanism
    trainX, trainy = har_sliding_window(trainX, trainy, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
    testX, testy = har_sliding_window(testX, testy, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)

    # Data is reshaped
    trainX = trainX.reshape((-1, SLIDING_WINDOW_LENGTH, SENSOR_CHANNELS)) # for input to Conv1D
    testX = testX.reshape((-1, SLIDING_WINDOW_LENGTH, SENSOR_CHANNELS)) # for input to Conv1D

    debug_print_count(trainy, '[b4]Train Y: ')
    debug_print_count(testy, '[b4]Test Y: ')

    #convert data to categorical form [0 0 0 0 0 0 0 0 1 0 0 0 0]. Represent class in vector
    # zero-offset class values
    #trainy = trainy - 1
    #testy = testy - 1

    debug_print_count(trainy, '[b4-train]train Y: ')
    debug_print_count(testy, '[b4-train]Test Y: ')
    
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)

    # process to throw the first row
    trainy = trainy[:,1:13]
    testy = testy[:,1:13]

    # Write data to text file
    #with open('TEST-x-data.txt', "a+") as file:
    #    for row in trainX[0]:
    #        file.write(" ".join(str(item) for item in row) + "\n")
    print(" ..after sliding and reshaping, train data: inputs {0}, targets {1}".format(trainX.shape, trainy.shape))
    print(" ..after sliding and reshaping, test data : inputs {0}, targets {1}".format(testX.shape, testy.shape))

    model_g = evaluate_model(trainX, trainy, testX, testy)
    #model_g.save('parallel_cnn_test_model.h5')

    # It can be used to reconstruct the model identically.
    # reconstructed_model = load_model("parallel_cnn_test_model.h5")
    
    #with open('predicted_data.txt', "a+") as file:
    #    for row in y_pred:
    #        file.write(" ".join(str(item) for item in row) + "\n")
    #y_pred = reconstructed_model.predict([testX,testX])
    y_pred = model_g.predict([testX,testX])
    y_predict = np.argmax(y_pred, axis=1)
    testy = np.argmax(testy, axis=1)
    
    # add back the value to make sure activity label start from 1 instead of 0
    testy = testy + 1
    y_predict= y_predict + 1 
    debug_print_count(testy, 'Test Y: ')

    print(confusion_matrix(testy, y_predict))
    print(classification_report(testy, y_predict))
    print("accuracy: " + str(accuracy_score(testy, y_predict)*100) + ('%'))

#     #repeat experiment
#     scores = list()
#     for r in range(repeats):
#         score = evaluate_model(trainX, trainy, testX, testy)
#         score = score * 100.0   
#         print('>#%d: %.3f' % (r+1, score))
#         scores.append(score)
#     #summarize results
#     summarize_results(scores)

# run the experiment
run_experiment()

Train set:(554294, 6)(554294,)
Test set:(261320, 6)(261320,)
[b4]Train Y: 
{1: 1704, 2: 1606, 3: 1471, 4: 1672, 5: 1856, 6: 1820, 7: 148, 8: 110, 9: 172, 10: 161, 11: 214, 12: 150}
[b4]Test Y: 
{1: 740, 2: 730, 3: 687, 4: 857, 5: 903, 6: 921, 7: 61, 8: 52, 9: 76, 10: 62, 11: 69, 12: 67}
[b4-train]train Y: 
{1: 1704, 2: 1606, 3: 1471, 4: 1672, 5: 1856, 6: 1820, 7: 148, 8: 110, 9: 172, 10: 161, 11: 214, 12: 150}
[b4-train]Test Y: 
{1: 740, 2: 730, 3: 687, 4: 857, 5: 903, 6: 921, 7: 61, 8: 52, 9: 76, 10: 62, 11: 69, 12: 67}
 ..after sliding and reshaping, train data: inputs (11084, 100, 6), targets (11084, 12)
 ..after sliding and reshaping, test data : inputs (5225, 100, 6), targets (5225, 12)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Co