In [None]:
import numpy as np
from numpy import genfromtxt
import glob
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import *
from keras.utils.np_utils import to_categorical
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import sys
pd.set_option('display.max_rows', 1000)
import random
from sklearn.utils import shuffle
from keras.callbacks import EarlyStopping
from scipy import stats
import math

In [None]:
def interpolate(df, index_list, col):
    count = len(index_list)
    
    before_index = index_list[0] - 1
    before_val = df.loc[before_index,col]
    
    after_index = index_list[len(index_list)-1] + 1
    after_val = df.loc[after_index,col]
    
    inc_amount = (after_val - before_val) / (count + 1)
    
    for i in range(count):
        update_index = index_list[i]
        update_val = df.loc[update_index-1,col] + inc_amount
        df.loc[update_index,col] = update_val
        
    return df

In [None]:
data_folder = "data/Protocol/"
files_path = data_folder + "subject*.dat"

# ====================
#files_path = data_folder + "subject101.dat"
# ====================

df_main = pd.DataFrame(columns=['exp_id','label_id','acc0','acc1','acc2','gyro0','gyro1','gyro2'])

files = glob.glob(files_path)
for file in files:
    df_exp = pd.read_csv(file, header=None, delimiter=r"\s+")
    df_exp_filtered = df_exp.filter([1, 4, 5, 6, 10, 11, 12])
    df_exp_filtered.columns = ['label_id','acc0','acc1','acc2','gyro0','gyro1','gyro2']
    
    exp_id = int(file[23])
    df_exp_filtered['exp_id'] = exp_id
    
    if exp_id==9:
        continue
    
    cols=['exp_id','label_id','acc0','acc1','acc2','gyro0','gyro1','gyro2']
    df_exp_filtered = df_exp_filtered[cols]
    
    total_rows = len(df_exp_filtered)
    interpol_cols = ['acc0','acc1','acc2','gyro0','gyro1','gyro2']
    for col in interpol_cols:
        index = 0
        while index < total_rows:
            if math.isnan(df_exp_filtered.loc[index,col]):
                index_list = [index]
                nan_index = index + 1
                while (True):
                    if math.isnan(df_exp_filtered.loc[nan_index,col]):
                        index_list.append(nan_index)
                        nan_index += 1
                    else:
                        break
                df_exp_filtered = interpolate(df_exp_filtered, index_list, col)
            index += 1
    
    df_main = df_main.append(df_exp_filtered, ignore_index = True)

In [None]:
# test interpolate for exp 1
# df_exp_filtered[19190:19199]

In [None]:
window_size = 512
window_move_size = 100 #window_size // 2

In [None]:
def trim(df_OVA):

    curr_index = 0
    act_start_index = 0
    act_end_index = 0

    df_OVA_trimmed = pd.DataFrame(columns=['exp_id','label_id','acc0','acc1','acc2','gyro0','gyro1','gyro2'])

    while (True):
        curr_act_id = df_OVA.loc[act_start_index, 'label_id']
    
        curr_index = act_start_index + 1
        while (True):
            act_id = df_OVA.loc[curr_index, 'label_id']
            if (act_id != curr_act_id):
                act_end_index = curr_index - 1
                break
            elif curr_index == (len(df_OVA) - 1):
                act_end_index = curr_index
                break
            else:
                curr_index += 1
            
        df_act_trimmed = df_OVA[act_start_index + 1000 : act_end_index - 1000]
    
        if len(df_act_trimmed) > 0:
            df_OVA_trimmed = df_OVA_trimmed.append(df_act_trimmed, ignore_index = True)
            #df_OVA_trimmed = df_OVA_trimmed.reset_index(drop=True)
    
        if curr_index >= (len(df_OVA) - 1):
            break
        else:
            act_start_index = act_end_index + 1
    
    return df_OVA_trimmed

In [None]:
def segment(df_OVA):

    segmeneted_activities = [ 'N/A',[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[] ]

    for i in range(1 , 9):

        df_exp = df_OVA.loc[df_OVA['exp_id'] == i]
        df_exp = df_exp.reset_index(drop=True)

        index = 0
        continu = True
        while (continu):

            if index >= len(df_exp):
                break

            df_window = df_exp.loc[index:index+window_size-1]
            if len(df_window) < window_size:
                break

            window_label_ids = set(df_window['label_id'].values)
            if (len(window_label_ids) > 1) or \
               (len(window_label_ids)==1 and next(iter(window_label_ids))==0):
                index = index + window_move_size
                continue

            na_window = np.array((df_window.loc[:,'acc0':'gyro2']))
            window_label_id = next(iter(window_label_ids))

            segmeneted_activities[window_label_id].append(na_window)
            index = index + window_move_size

    for i in range(1,25):
        segmeneted_activities[i] = np.array(segmeneted_activities[i])
        
    for i in range(1,25):
        print ("Act_id:" , i , " " , segmeneted_activities[i].shape)
        
    return segmeneted_activities

In [None]:
def get_binary_X_and_Y (act_id , segmeneted_activities):

    X = np.concatenate((segmeneted_activities[1], segmeneted_activities[2]))
    X = np.concatenate((           X            , segmeneted_activities[3]))
    X = np.concatenate((           X            , segmeneted_activities[17]))
    X = np.concatenate((           X            , segmeneted_activities[16]))
    X = np.concatenate((           X            , segmeneted_activities[12]))
    X = np.concatenate((           X            , segmeneted_activities[13]))
    X = np.concatenate((           X            , segmeneted_activities[4]))
    X = np.concatenate((           X            , segmeneted_activities[7]))
    X = np.concatenate((           X            , segmeneted_activities[6]))
    X = np.concatenate((           X            , segmeneted_activities[5]))
    X = np.concatenate((           X            , segmeneted_activities[24]))

    ys = ['N/A',None,None,None,None,None,None,None,None,None,None,None, \
          None,None,None,None,None,None,None,None,None,None,None,None,None]
    for i in range(1,25):
        size = segmeneted_activities[i].shape[0]
        if act_id == i:
            y = np.full (size , 1)
        else:
            y = np.full (size , 0)
        ys[i] = y

    Y = np.concatenate((ys[1], ys[2]))
    Y = np.concatenate((  Y  , ys[3]))
    Y = np.concatenate((  Y  , ys[17]))
    Y = np.concatenate((  Y  , ys[16]))
    Y = np.concatenate((  Y  , ys[12]))
    Y = np.concatenate((  Y  , ys[13]))
    Y = np.concatenate((  Y  , ys[4]))
    Y = np.concatenate((  Y  , ys[7]))
    Y = np.concatenate((  Y  , ys[6]))
    Y = np.concatenate((  Y  , ys[5]))
    Y = np.concatenate((  Y  , ys[24]))

    return (X , Y)

In [None]:
def create_model (data_input_shape):

    LSTM_output_dim = 100
    
    model = Sequential()
    
    #model.add(LSTM(LSTM_output_dim, return_sequences=True, name='LSTM1', input_shape=data_input_shape))    
    #model.add(LSTM(LSTM_output_dim, return_sequences=True, name='LSTM2'))
    #model.add(LSTM(LSTM_output_dim, return_sequences=False, name='LSTM3'))
    
    model.add(LSTM(LSTM_output_dim, return_sequences=False, name='LSTM1', input_shape=data_input_shape))
    
    model.add(Dense(15, activation='sigmoid',name='Hidden_Layer'))
    
    #model.add(Dense(3, activation='softmax', name='Softmax_layer'))
    model.add(Dense(1, activation='sigmoid',name='Dense_out'))
    
    #model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
    model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['accuracy'])

    return model

In [None]:
def train_binary_classifier(act_id, segmeneted_activities):
    (X , Y) = get_binary_X_and_Y (act_id , segmeneted_activities)
    
    X , Y = shuffle(X , Y)

    X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.25)
    X_train,X_valid,y_train,y_valid=train_test_split(X_train,y_train,test_size=0.333)

    data_input_shape = X.shape[1:3]
    model = create_model(data_input_shape)
    #print (model.summary()  , "\n")

    patience = 5
    callbacks=[EarlyStopping(monitor='val_loss',patience=patience,verbose=1)]
    history = model.fit(X_train,y_train,
                        validation_data=[X_valid,y_valid],
                        epochs=15, 
                        batch_size=100,
                        verbose=2
                        ,callbacks=callbacks
                       )

    score = model.evaluate(X_test, y_test, verbose=0)
    print('\nTest loss:', score[0])
    print('Test accuracy:', score[1])
    print ()
    
    return model

In [None]:
def train(df_OVA):
    df_OVA = trim(df_OVA)
    
    segmeneted_activities = segment(df_OVA)
    
    #training binary classifier for activity 1 (Lying)
    print ()
    print ("Training activity 1 ...")
    act_id = 1
    model_1 = train_binary_classifier(act_id, segmeneted_activities)
    #model_1.save('model512_1.h5')
    
    #training binary classifier for activity 2 (Sitting)
    print ("Training activity 2 ...")
    act_id = 2
    model_2 = train_binary_classifier(act_id, segmeneted_activities)
    #model_2.save('model512_2.h5')

    #training binary classifier for activity 3 (Standing)
    print ("Training activity 3 ...")
    act_id = 3
    model_3 = train_binary_classifier(act_id, segmeneted_activities)
    #model_3.save('model512_3.h5')
    
    #training binary classifier for activity 17 (Ironing)
    print ("Training activity 17 ...")
    act_id = 17
    model_17 = train_binary_classifier(act_id, segmeneted_activities)
    #model_17.save('model512_17.h5')
    
    #training binary classifier for activity 16 (Vacuum Cleaning)
    print ("Training activity 16 ...")
    act_id = 16
    model_16 = train_binary_classifier(act_id, segmeneted_activities)
    #model_16.save('model512_16.h5')
    
    #training binary classifier for activity 12 (Ascending Stairs)
    print ("Training activity 12 ...")
    act_id = 12
    model_12 = train_binary_classifier(act_id, segmeneted_activities)
    #model_12.save('model512_12.h5')
    
    #training binary classifier for activity 13 (Descending Stairs)
    print ("Training activity 13 ...")
    act_id = 13
    model_13 = train_binary_classifier(act_id, segmeneted_activities)
    #model_13.save('model512_13.h5')
    
    #training binary classifier for activity 4 (Walking)
    print ("Training activity 4 ...")
    act_id = 4
    model_4 = train_binary_classifier(act_id, segmeneted_activities)
    #model_4.save('model512_4.h5')
    
    #training binary classifier for activity 7 (Nordic Walking)
    print ("Training activity 7 ...")
    act_id = 7
    model_7 = train_binary_classifier(act_id, segmeneted_activities)
    #model_7.save('model512_7.h5')
    
    #training binary classifier for activity 6 (Cycling)
    print ("Training activity 6 ...")
    act_id = 6
    model_6 = train_binary_classifier(act_id, segmeneted_activities)
    #model_6.save('model512_6.h5')
    
    #training binary classifier for activity 5 (Running)
    print ("Training activity 5 ...")
    act_id = 5
    model_5 = train_binary_classifier(act_id, segmeneted_activities)
    #model_5.save('model512_5.h5')
    
    #training binary classifier for activity 24 (Rope Jumping)
    print ("Training activity 24 ...")
    act_id = 24
    model_24 = train_binary_classifier(act_id, segmeneted_activities)
    #model_24.save('model512_24.h5')
    
    models = [None,None,None,None,None,None,None,None,None,None,None,None, \
              None,None,None,None,None,None,None,None,None,None,None,None,None]
    models[1] = model_1
    models[2] = model_2
    models[3] = model_3
    models[17] = model_17
    models[16] = model_16
    models[12] = model_12
    models[13] = model_13
    models[4] = model_4
    models[7] = model_7
    models[6] = model_6
    models[5] = model_5
    models[24] = model_24

    return models

In [None]:
# from keras.models import load_model
# model = load_model('model.h5')

In [None]:
def predict(X_predict):

    X_predict = np.reshape(X_predict , (1,X_predict.shape[0],X_predict.shape[1]))

    scores = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]

    for i in range(24):
        model = models[i+1]
        if (model is not None):
            pred_probs = model.predict_proba(X_predict)
            scores[i] = pred_probs[0][0]

    threshold = 0.5
    if max(scores) < threshold:
        label_id = 0
    else:
        label_id = scores.index(max(scores)) + 1
    
    return label_id, scores

In [None]:
def test(df_continuous , models):

    # =================
    #df_continuous2 = df_continuous.loc[df_continuous['exp_id'] == 7]
    df_continuous2 = df_continuous
    # =================

    index = 0
    continu = True
    correct_preds = 0
    total_preds = 0
    while (continu):

        if index >= len(df_continuous2):
            break

        df_window = df_continuous2.loc[index:index+window_size-1]
        if len(df_window) < window_size:
            break

        mode_win_labels = stats.mode(df_window['label_id'].values)
        window_label_id = mode_win_labels[0][0]

        if window_label_id == 0:
            print ("Window Label: 0")
            index = index + window_move_size
            continue

        na_window = np.array((df_window.loc[:,'acc0':'gyro2']))
        pred_label_id, _ = predict(na_window)

        print ("Window Label: " + str(window_label_id) + "  Predicted Label: " + str(pred_label_id))
        
        total_preds += 1

        if (pred_label_id == window_label_id):
            correct_preds += 1

        index = index + window_move_size
            
    accu = correct_preds/total_preds
    print ("\nContinuous Accuracy: " + str(accu))
    return accu

In [None]:
folds_accu = [0,0,0,0,0,0,0,0]
for i in range(1,9):
    split_exp_id = i
    df_continuous = df_main.loc[df_main['exp_id'] == split_exp_id]
    df_continuous = df_continuous.reset_index(drop=True)

    df_OVA = df_main.loc[df_main['exp_id'] != split_exp_id]
    df_OVA = df_OVA.reset_index(drop=True)

    print ("fold" , i , "Training:")
    print ("len of df_main:" , str(len(df_main)))
    print ("len of df_continuous:" , str(len(df_continuous)))
    print ("len of df_OVA:" , str(len(df_OVA)))
    print ()
    
    models = train(df_OVA)
    
    print ("\nfold" , i , "Testing:")
    accu = test(df_continuous, models)
    folds_accu[i-1] = accu
    
    print()

In [None]:
sum = 0
for i in range(len(folds_accu)):
    sum += folds_accu[i]
    
avg_accu = sum / len(folds_accu)
print ("\nAverage Accuracy for all folds:" , avg_accu)