In [97]:
from keras import backend as K
import numpy as np
import pandas as pd
from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
import keras_contrib
from keras.utils import to_categorical
import os
from keras.preprocessing.sequence import pad_sequences
from keras_contrib.layers import CRF

### Get Data from pickle file

In [2]:
data_df = pd.read_pickle("../dataset/HAPT Data Set/dataset.pkl")

In [3]:
data_df.shape

(815614, 9)

### Data Preparation

In [4]:
class Dataset(object):

    def __init__(self, dataset_root=None):
        self.dataset_root = dataset_root
        self.n_tags = 13
        self.act_map = self.activity_map()
    def activity_map(self):
#         act_map = {'<IDLE>':0}
        act_map = {}
        with open(os.path.join(self.dataset_root, "activity_labels.txt"), "r") as al:
            for line in al.readlines():
                line = line.strip()
                label, activity = line.split(" ")
                act_map[activity] = int(label)
        return act_map   
    def _load_data(self, file_path=None):
        """
        Load train and test data into a (rows, columns) format numpy array.
        """
        self.temp_df = pd.read_pickle(file_path)
        self.temp_df['activity_id'] = self.temp_df['activity'].apply(lambda x: self.act_map[x]-1) 
        self.temp_df = self.temp_df.drop(columns = ['activity'])
        
    def load(self,split ="train"):
        """
        Loads X and y.
        """
        self._load_data(self.dataset_root+"/"+split+".pkl")
        y = self.temp_df[self.temp_df.columns[-1]]
        y = to_categorical(y)
        X = self.temp_df[self.temp_df.columns[:7]]        
        return X, y

In [69]:
ds = Dataset("../dataset/HAPT Data Set/")
train_X,train_y = ds.load()
test_X,test_y = ds.load("test")
train_y = pd.DataFrame(train_y)
test_y = pd.DataFrame(test_y)

In [70]:
test_y.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
exp_number = len(list(set(train_X['exp_id'].values)))
train_X_gp = train_X.groupby(['exp_id'])
test_X_gp = test_X.groupby(['exp_id'])

In [72]:
group_dict = {}
ct = 0
for key, item in train_X_gp:
    gp_df = pd.DataFrame(train_X_gp.get_group(key))
#     print "group:"+str(key)
#     print "group_shape:"+str(gp_df.shape[0])
    ct += gp_df.shape[0]
    del_row_idx = gp_df.shape[0]%128
    group_dict[key] = (ct - del_row_idx,ct)
#     print "start_idx:"+str(gp_df.shape[0] - del_row_idx)
#     print "end_idx:"+str(gp_df.shape[0])
#     trainx = pd.concat(trainx, gp_df)
for g in group_dict:
    start = group_dict[g][0]
    end = group_dict[g][1]
    if g in train_X['exp_id']:
        train_X = train_X.drop(xrange(start, end)).copy(deep = True)
        train_y = train_y.drop(xrange(start, end)).copy(deep = True)
train_X.index = xrange(len(train_X))
train_y.index = xrange(len(train_X))

In [73]:
tr_X = train_X.drop(columns = 'exp_id')

In [74]:
train_y.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 563456 entries, 0 to 563455
Data columns (total 12 columns):
0     563456 non-null float32
1     563456 non-null float32
2     563456 non-null float32
3     563456 non-null float32
4     563456 non-null float32
5     563456 non-null float32
6     563456 non-null float32
7     563456 non-null float32
8     563456 non-null float32
9     563456 non-null float32
10    563456 non-null float32
11    563456 non-null float32
dtypes: float32(12)
memory usage: 25.8 MB


In [75]:
X = tr_X.values
X = np.reshape(X, newshape=(-1, 128, 6))

In [76]:
X.shape

(4402, 128, 6)

In [77]:
y = train_y.values
y = np.reshape(y, newshape=(-1, 128, 12))

In [78]:
y.shape

(4402, 128, 12)

In [79]:
ct = 0
group_dict = {}
for key, item in test_X_gp:
    gp_df = pd.DataFrame(test_X_gp.get_group(key))
#     print "group:"+str(key)
#     print "group_shape:"+str(gp_df.shape[0])
    ct += gp_df.shape[0]
    del_row_idx = gp_df.shape[0]%128
    group_dict[key] = (ct - del_row_idx,ct)
for g in group_dict:
    start = group_dict[g][0]
    end = group_dict[g][1]
    if g in test_X['exp_id']:
        test_X = test_X.drop(xrange(start, end)).copy(deep = True)
        test_y = test_y.drop(xrange(start, end)).copy(deep = True)
test_X.index = xrange(len(test_X))
test_y.index = xrange(len(test_X))

In [80]:
te_X = test_X.drop(columns = 'exp_id')

In [None]:
te_X

In [85]:
te_X = np.reshape(te_X.values,newshape=(-1, 128, 6))

In [87]:
te_y = np.reshape(test_y.values,newshape = (-1,128,12))

In [88]:
te_X.shape, te_y.shape

((1940, 128, 6), (1940, 128, 12))

In [120]:
data_df.groupby("activity").mean()

Unnamed: 0_level_0,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,exp_id,usr_id
activity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LAYING,0.363873,0.461315,0.365683,-0.017025,0.014463,-0.053613,33.41595,16.596712
LIE_TO_SIT,0.086444,0.655496,0.584529,0.0078,0.002084,0.007878,30.395157,15.121883
LIE_TO_STAND,0.079709,0.680099,0.528008,0.008728,0.001275,0.00441,31.337444,15.594644
SITTING,0.832984,0.13646,0.170461,0.030081,-0.006273,0.019097,33.457621,16.624999
SIT_TO_LIE,0.999628,0.047424,0.052002,-0.006552,-0.011597,-0.003875,32.061957,15.943836
SIT_TO_STAND,0.905752,0.24052,0.228301,0.009407,0.011619,-0.008985,31.556856,15.678291
STANDING,0.935457,-0.102094,0.088819,-0.013834,-0.023273,0.012171,33.096818,16.441222
STAND_TO_LIE,0.990244,-0.177374,-0.050188,0.062486,-0.023687,-0.014757,29.484811,14.683035
STAND_TO_SIT,1.005577,-0.153504,0.010305,-0.001116,-0.00099,0.006703,31.72596,15.760372
WALKING,0.976176,-0.160281,-0.04496,0.010646,-0.00591,0.002779,30.656691,15.24861


In [91]:
input = Input(shape=(128, 6))
model = Bidirectional(LSTM(units=50, return_sequences=True,
                           recurrent_dropout=0.1))(input)  # variational biLSTM
model = Bidirectional(LSTM(units=100, return_sequences=True,
                           recurrent_dropout=0.1))(model)  # variational biLSTM
model = TimeDistributed(Dense(50, activation="relu"))(model)  # a dense layer as suggested by neuralNer
crf = CRF(12)  # CRF layer
out = crf(model)  # output
model = Model(input, out)
model.compile(optimizer="nadam", loss=crf.loss_function, metrics=[crf.accuracy])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 128, 6)            0         
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 128, 100)          22800     
_________________________________________________________________
bidirectional_11 (Bidirectio (None, 128, 200)          160800    
_________________________________________________________________
time_distributed_8 (TimeDist (None, 128, 50)           10050     
_________________________________________________________________
crf_7 (CRF)                  (None, 128, 12)           780       
Total params: 194,430
Trainable params: 194,430
Non-trainable params: 0
_________________________________________________________________


In [92]:
history = model.fit(X, np.array(y), batch_size=32, epochs=50,
                    validation_split=0.1, verbose=1, shuffle=True)

Train on 3961 samples, validate on 441 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [93]:
history = model.fit(X, np.array(y), batch_size=32, epochs=50,
                    validation_split=0.1, verbose=1, shuffle=True)

Train on 3961 samples, validate on 441 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [95]:
model.evaluate(te_X,te_y,batch_size=32)



[-0.08320342098775599, 0.6165673323513307]

In [94]:
model.save('82model.h5')

In [96]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.hdf5")

### Build LSTM CRF Model

In [None]:
class LSTM_model(Model):

    def __init__(self, train_data=None, test_data=None, tb_log_dir=None):
        self.n_timesteps = 128
        self.n_features = 0
        self.build()

    def evaluate(self, log_dir=None):
        accuracy = super().evaluate()

        return accuracy

    def build(self):
        model = Input(shape=(self.n_timesteps,self.n_features))       
        model = Bidirectional(LSTM(units=50, return_sequences=True,
                           recurrent_dropout=0.1))(model)  # variational biLSTM
        model = TimeDistributed(Dense(50, activation="relu"))(model)  # a dense layer as suggested by neuralNer
        crf = CRF()
#         model.add(LSTM(100, return_Sequences = True, input_shape=(
#             self.n_timesteps, self.n_features)))
#         model.add(Dropout(0.5))
#         model.add(Dense(100, activation='relu'))
#         model.add(Dense(self.n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                           optimizer='adam', metrics=['accuracy'])
