# HAR
* http://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones
* MLP-Mixer, LSTM, MLP, 1DCNNあたりで比較

In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.regularizers import l2, l1

import models, utils

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[0], 'GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

%load_ext autoreload
%autoreload 2

## データみる

In [2]:
SELECT_COLUMNS = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

window_size = 128

In [3]:
DATA_PATH = '../../../data/OpenData/HAR/UCI_HAR_Dataset/'

In [4]:
def get_df_data(train=True):
    
    kind = 'test'
    
    if train:
        kind = 'train'
    
    target = pd.read_csv(os.path.join(DATA_PATH, kind, f'y_{kind}.txt'), header=None)
    
    data = []
    for c in SELECT_COLUMNS:
        df = pd.read_csv(os.path.join(DATA_PATH, kind, 'Inertial Signals', f'{c}{kind}.txt'), sep=' .', header=None)
        df.columns = [f'{c}{i}' for i in range(df.shape[1])]
        data.append(df)
    data = pd.concat(data, axis=1)
#     subject = pd.read_csv(os.path.join(DATA_PATH, kind, f'subject_{kind}.txt'), header=None)
#     data['subject'] = subject.values
    
    return data, target

def preprocessing(X, y, window_size=128):
    X_w = X.values.reshape((X.shape[0], len(SELECT_COLUMNS), window_size))
    X_w = np.swapaxes(X_w, 1, 2)
    
    y_w = (y - 1).values.ravel()
    
    return X_w, y_w

In [5]:
X_train, y_train = get_df_data()
X_test, y_test = get_df_data(train=False)

  if sys.path[0] == '':


In [6]:
X_train_w, y_train_w = preprocessing(X_train, y_train)
X_test_w, y_test_w = preprocessing(X_test, y_test)

In [7]:
# Test
assert X_train.loc[0, 'body_acc_x_127'] == X_train_w[0, -1, 0]
assert X_train.loc[1024, 'body_acc_y_120'] == X_train_w[1024, 120, 1]

assert X_test.loc[0, 'body_acc_x_127'] == X_test_w[0, -1, 0]
assert X_test.loc[1024, 'body_acc_y_120'] == X_test_w[1024, 120, 1]

assert set(range(6)) == set(y_train_w)
assert set(range(6)) == set(y_test_w)

## 学習

### MLP-Mixer

In [15]:
i = 0

In [109]:
parameters = dict(
    num_blocks = 6,
    patch_size=12, 
    hidden_dim=64,
    tokens_mlp_dim=32, 
    channels_mlp_dim=128,
    mlp_block_params=dict(
        activation='gelu',
        dropout_rate=0.5,
        dense_params=dict(
            kernel_initializer='he_normal',
        )
    )
)

In [110]:
mixer_model = models.mlp_mixer((window_size, len(SELECT_COLUMNS)), num_classes=6, conv1d=True, **parameters)

set conv1d model


In [111]:
mixer_model.compile(Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])

In [112]:
save_folder = f'./results/TimeSeriesModel/mlp_mixer/{i}/'

if not os.path.exists(save_folder):
    os.mkdir(save_folder)

cb = tf.keras.callbacks.ModelCheckpoint(
    save_folder,
    save_weights_only=True, 
    save_best_only=True
)

In [113]:
%time history = mixer_model.fit(X_train_w, y_train_w, validation_split=0.1, epochs=100, batch_size=64, callbacks=[cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [102]:
mixer_model.load_weights(save_folder)
loss, acc = mixer_model.evaluate(X_test_w, y_test_w)

i += 1

# parameters = convert_reg_params(parameters)
utils.save_results(parameters, os.path.join(save_folder, 'params.json'), 'json')
utils.save_results(history.history, os.path.join(save_folder, 'history.json'), 'json')
utils.save_results(dict(test_loss=loss, test_acc=acc), os.path.join(save_folder, 'evaluate.json'), 'json')



### Simple-MLP

In [114]:
parameters = dict(
    layers_list=[128, 128, 128],
    activation='relu',
    num_classes=6,
    aug=False
)

In [115]:
mlp_model = models.simple_mlp(input_shape=(window_size, len(SELECT_COLUMNS)), **parameters)
mlp_model.compile(Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])

In [116]:
save_folder = './results/TimeSeriesModel/sim_mlp/0/'

cb = tf.keras.callbacks.ModelCheckpoint(
    save_folder,
    save_weights_only=True, 
    save_best_only=True
)

%time  mlp_model.fit(X_train_w, y_train_w, validation_split=0.1, epochs=100, batch_size=64, callbacks=[cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7ff1aa30dba8>

In [157]:
mlp_model.load_weights(save_folder)
mlp_model.evaluate(X_test_w, y_test_w)



[0.9225102663040161, 0.8418731093406677]

### CNN-MLP

In [117]:
parameters = dict(
    layers_list=[128, 128, 128],
    activation='relu',
    aug=False,
    conv1d=True
)
conv_params=dict(
    filters=128, 
    kernel_size=8
)

In [118]:
cnn_mlp_model = models.cnn_mlp(input_shape=(window_size, len(SELECT_COLUMNS)), conv_params=conv_params, **parameters)
cnn_mlp_model.compile(Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])

In [119]:
save_folder = './results/TimeSeriesModel/cnn_mlp/0/'

cb = tf.keras.callbacks.ModelCheckpoint(
    save_folder,
    save_weights_only=True, 
    save_best_only=True
)

%time cnn_mlp_model.fit(X_train_w, y_train_w, epochs=100, validation_split=0.1, batch_size=64, callbacks=[cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7ff19b7ea828>

In [161]:
cnn_mlp_model.load_weights(save_folder)
cnn_mlp_model.evaluate(X_test_w, y_test_w)



[0.8203331232070923, 0.8907363414764404]

### LSTM

In [123]:
parameters = dict(
    layers_list=[64, 64],
    activation='relu',
    aug=False,
    dropout_rate=0.0
)

In [124]:
lstm_model = models.lstm(input_shape=(window_size, len(SELECT_COLUMNS), ), **parameters)
lstm_model.compile(Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])

In [125]:
save_folder = './results/TimeSeriesModel/lstm/0/'

cb = tf.keras.callbacks.ModelCheckpoint(
    save_folder,
    save_weights_only=True, 
    save_best_only=True
)

%time lstm_model.fit(X_train_w, y_train_w, epochs=100, validation_split=0.1, batch_size=64, callbacks=[cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7ff19b526ba8>

In [165]:
lstm_model.load_weights(save_folder)
lstm_model.evaluate(X_test_w, y_test_w)



[0.1661462038755417, 0.9555479884147644]

### CNN

In [126]:
parameters = dict(
    layers_list=[dict(
        filters=128, 
        kernel_size=8
    ),
    dict(
        filters=128, 
        kernel_size=8
    ),
    dict(
        filters=128, 
        kernel_size=8
    )],
    activation='relu',
    aug=False,
    conv1d=True
)

In [127]:
cnn_model = models.cnn(input_shape=(window_size, len(SELECT_COLUMNS)), **parameters)
cnn_model.compile(Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])

In [128]:
save_folder = './results/TimeSeriesModel/cnn/'

cb = tf.keras.callbacks.ModelCheckpoint(
    save_folder,
    save_weights_only=True, 
    save_best_only=True
)

%time cnn_model.fit(X_train_w, y_train_w, epochs=100, validation_split=0.1, batch_size=64, callbacks=[cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7ff1aa7f9f98>

In [176]:
cnn_model.load_weights(save_folder)
cnn_model.evaluate(X_test_w, y_test_w)



[0.24426037073135376, 0.9216151833534241]

In [96]:
mlp_model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1152)              0         
_________________________________________________________________
dense_374 (Dense)            (None, 128)               147584    
_________________________________________________________________
activation (Activation)      (None, 128)               0         
_________________________________________________________________
dense_375 (Dense)            (None, 128)               16512     
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_376 (Dense)            (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)             

In [95]:
cnn_mlp_model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_14 (Conv1D)           (None, 121, 128)          9344      
_________________________________________________________________
flatten_1 (Flatten)          (None, 15488)             0         
_________________________________________________________________
dense_378 (Dense)            (None, 128)               1982592   
_________________________________________________________________
activation_3 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_379 (Dense)            (None, 128)               16512     
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_380 (Dense)            (None, 128)             

In [177]:
cnn_model.summary()

Model: "sequential_39"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_16 (Conv1D)           (None, 121, 128)          9344      
_________________________________________________________________
activation_55 (Activation)   (None, 121, 128)          0         
_________________________________________________________________
conv1d_17 (Conv1D)           (None, 114, 128)          131200    
_________________________________________________________________
activation_56 (Activation)   (None, 114, 128)          0         
_________________________________________________________________
conv1d_18 (Conv1D)           (None, 107, 128)          131200    
_________________________________________________________________
activation_57 (Activation)   (None, 107, 128)          0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 13696)           

In [103]:
mixer_model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_15 (Conv1D)           (None, 10, 64)            6976      
_________________________________________________________________
reshape_14 (Reshape)         (None, 10, 64)            0         
_________________________________________________________________
mixer_block_90 (MixerBlock)  (None, 10, 64)            17514     
_________________________________________________________________
mixer_block_91 (MixerBlock)  (None, 10, 64)            17514     
_________________________________________________________________
mixer_block_92 (MixerBlock)  (None, 10, 64)            17514     
_________________________________________________________________
mixer_block_93 (MixerBlock)  (None, 10, 64)            17514     
_________________________________________________________________
mixer_block_94 (MixerBlock)  (None, 10, 64)          

In [181]:
lstm_model.summary()

Model: "sequential_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_26 (LSTM)               (None, 128, 64)           18944     
_________________________________________________________________
activation_53 (Activation)   (None, 128, 64)           0         
_________________________________________________________________
lstm_27 (LSTM)               (None, 64)                33024     
_________________________________________________________________
activation_54 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_423 (Dense)            (None, 10)                650       
Total params: 52,618
Trainable params: 52,618
Non-trainable params: 0
_________________________________________________________________
