## Activity Recognition system based on Multisensor data fusion
- 센서로부터 행동 패턴 파악하기
- X시계열 데이터로서 1시간 간격으로 평균 센서값이 저장되어 있습니다
- 총 480회의 센서 신호들을 통해서 사용자가 어떤 행동 패턴을 하고 있는지를 알아냅니다

https://archive.ics.uci.edu/ml/datasets/Activity+Recognition+system+based+on+Multisensor+data+fusion+(AReM)


In [11]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import glob

In [86]:
TARGET_COLUMN = 'PE'
PATH_ROOT = '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/'

sequence_train = 24
class_k = 7
sequence_length = 480

### 서브폴더를 탐색합니다

In [13]:
folder_contents = glob.glob(PATH_ROOT+'*')
folder_contents

['/home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending2',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/cycling',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/sensorsPlacement.pdf',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/standing',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/bendingType.pdf',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending1',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/lying',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/walking',
 '/home/sogangori/Downloads/dataset/machine learning dataset/AReM/sitting']

In [21]:
sub_folders = []
for contents in folder_contents:
    if not '.' in contents:
        sub_folder = contents.split(os.sep)[-1]
        sub_folders.append(sub_folder)
sub_folders

['bending2', 'cycling', 'standing', 'bending1', 'lying', 'walking', 'sitting']

In [37]:
path_csv_list = []
for i in range(len(sub_folders)):
    sub_folder = sub_folders[i]
    path_sub_folder = PATH_ROOT + sub_folder + os.sep     
    paths_csv = glob.glob(path_sub_folder + "*.csv")
    paths_csv.sort()
    path_csv_list.append(paths_csv)
    print(len(paths_csv), paths_csv[0]) 
print('path_csv_list', len(path_csv_list))

6 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending2/dataset1.csv
15 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/cycling/dataset1.csv
15 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/standing/dataset1.csv
7 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending1/dataset1.csv
15 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/lying/dataset1.csv
15 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/walking/dataset1.csv
15 /home/sogangori/Downloads/dataset/machine learning dataset/AReM/sitting/dataset1.csv
path_csv_list 7


###  csv 파일을 하나 읽어봅니다

In [58]:
sample_csv = path_csv_list[0][0]
df = pd.read_csv(sample_csv, skiprows=4)
df.shape

(480, 7)

In [91]:
df.iloc[:,1:].head()

Unnamed: 0,avg_rss12,var_rss12,avg_rss13,var_rss13,avg_rss23,var_rss23
0,23.75,0.43,24.0,0.0,24.67,0.47
1,22.67,0.94,24.0,0.0,24.67,0.94
2,21.33,0.47,24.0,0.0,24.75,0.83
3,21.0,0.0,24.0,0.0,25.25,1.09
4,23.0,0.71,23.75,0.43,24.75,0.83


In [100]:
np.array(df.iloc[:,1:]).shape

(480, 6)

In [61]:
df.isna().sum().sum()

0

In [96]:
def convert_csv_to_array(path, cls):
    df = pd.read_csv(path, skiprows=4)    
    df = df.dropna().iloc[:,1:]
    return np.array(df)

### only for test speed
- 학습 속도를 위해서 csv 파일은 2개만 사용하겠습니다.

In [97]:
restrict_dataset_m_per_class = 2

In [101]:
arr_list = []
y_list = []
for i in range(len(path_csv_list)):
    path_cls_cls = path_csv_list[i]
    for j in range(np.minimum(restrict_dataset_m_per_class, len(path_cls_cls))):
        print('path_cls_cls',path_cls_cls[j])
        arr = convert_csv_to_array(path_cls_cls[j], i)
        arr_list.append(arr)
        y_list.append(i)
        
len(arr_list)

path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending2/dataset1.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending2/dataset2.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/cycling/dataset1.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/cycling/dataset10.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/standing/dataset1.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/standing/dataset10.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending1/dataset1.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/bending1/dataset2.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/lying/dataset1.csv
path_cls_cls /home/sogangori/Downloads/dataset/machine learning dataset/AReM/lying/dataset10.csv
path_cls_cls /h

14

### 배열로 변환

In [116]:
data_arr = np.stack(arr_list, 0)
data_arr.shape

(14, 480, 6)

### 정규화

In [115]:
from sklearn.preprocessing import MinMaxScaler

In [121]:
data_arr_3d_shape = data_arr.shape
data_arr_2d_back = np.reshape(data_arr, [-1, data_arr.shape[-1]])
scaler_x = MinMaxScaler().fit(data_arr_2d_back)
data_2d_normal = scaler_x.transform(data_arr_2d_back)
data_arr = np.reshape(data_2d_normal, data_arr_3d_shape)

### 학습셋과 테스트셋을 나누자

In [122]:
feature_k = data_arr.shape[-1]
test_m = class_k * restrict_dataset_m_per_class//2
train_m = class_k * (restrict_dataset_m_per_class-restrict_dataset_m_per_class//2)
feature_k, train_m, test_m

(6, 7, 7)

In [123]:
x_train = np.zeros((train_m, sequence_length, feature_k), np.float32)
x_test  = np.zeros((test_m, sequence_length, feature_k), np.float32)
y_train = np.zeros((train_m), np.int32)
y_test  = np.zeros((test_m), np.int32)
for i in range(len(data_arr)):
    cls = y_list[i]
    arr = data_arr[i]    
    if i%2==0:
        x_train[i//2] = arr
        y_train[i//2] = cls 
    else:
        x_test[i//2] = arr
        y_test[i//2] = cls 

In [124]:
np.min(x_train), np.mean(x_train), np.max(x_train)

(0.0, 0.33014786, 1.0)

In [126]:
x_train.shape, y_train.shape

((7, 480, 6), (7,))

###  tensorflow RNN modeling
- 모델 구현 방법은 다양합니다만 뉴럴넷에서 가장 많이 사용하는 conv, rnn, fc 만으로 단순하게 구성해보겠습니다
- convolution을 사용해 2시간 간격으로  5시간 동안의 데이터를 인코딩해줍니다. 시퀀스 길이가 절반으로 줄어들것입니다
- GRU 를 이용해 시퀀스 축에 따라서 인코딩합니다
- fc 가 GRU의 마지막 output 32개 채널을 받아서 어떤 활동을 하는지 인식합니다

In [134]:
import tensorflow as tf
import tensorflow.contrib.slim as slim

In [135]:
tf.reset_default_graph()
def predict(net):
    print('net',net)
    net = slim.conv1d(net, 32, kernel_size=5, stride=2)
    
    print('net',net)
    cell = tf.keras.layers.GRUCell(32)
    outputs, state = tf.nn.dynamic_rnn(cell, net, dtype=tf.float32)
   
    net = slim.fully_connected(net[:,-1], class_k)
        
    return net

In [154]:
PX = tf.placeholder(tf.float32, [None, x_train.shape[1], x_train.shape[2]])
PY = tf.placeholder(tf.int32, [None])
h = predict(PX)
h_cls = tf.argmax(h, -1,output_type=tf.int32)
cost = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=PY, logits=h))
acc = tf.reduce_mean(tf.cast(tf.equal(PY, h_cls), tf.float32))
opt = tf.train.GradientDescentOptimizer(0.1)
train_op = opt.minimize(cost)
sess = tf.Session()
sess.run(tf.initialize_all_variables())

net Tensor("Placeholder_8:0", shape=(?, 480, 6), dtype=float32)
net Tensor("Conv_4/Relu:0", shape=(?, 240, 32), dtype=float32)


In [177]:
EPOCH = 1000
DISPLAY_k = 10
for iter in range(EPOCH):
    _, _cost, _acc = sess.run([train_op, cost, acc], {PX:x_train, PY:y_train})
    if iter % (EPOCH//DISPLAY_k) == 0:
        _acc_test = sess.run(acc, {PX:x_test, PY:y_test})
        print('%d cost:%.5f acc_train:%.2f, acc_test:%.2f)' %(iter, _cost, _acc, _acc_test))

0 cost:0.83581 acc_train:0.71, acc_test:0.57)
100 cost:0.83570 acc_train:0.71, acc_test:0.57)
200 cost:0.83571 acc_train:0.71, acc_test:0.57)
300 cost:0.83563 acc_train:0.71, acc_test:0.57)
400 cost:0.83557 acc_train:0.71, acc_test:0.57)
500 cost:0.83552 acc_train:0.71, acc_test:0.57)
600 cost:0.83549 acc_train:0.71, acc_test:0.57)
700 cost:0.83543 acc_train:0.71, acc_test:0.57)
800 cost:0.83542 acc_train:0.71, acc_test:0.57)
900 cost:0.83533 acc_train:0.71, acc_test:0.57)


### metric, 성능측정

In [178]:
train_h_cls = sess.run(h_cls, {PX:x_train})
test_h_cls = sess.run(h_cls, {PX:x_test})

In [179]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [180]:
sub_folders

['bending2', 'cycling', 'standing', 'bending1', 'lying', 'walking', 'sitting']

In [191]:
confusion_matrix(y_true=y_train, y_pred = train_h_cls)

array([[1, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 0]])

In [192]:
confusion_matrix(y_true=y_test, y_pred = test_h_cls)

array([[0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1, 0, 0]])

In [193]:
confusion_matrix(y_true=y_test, y_pred = test_h_cls)

array([[0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1, 0, 0]])

In [194]:
print(classification_report(y_true=y_train, y_pred = train_h_cls, target_names=sub_folders))

             precision    recall  f1-score   support

   bending2       0.33      1.00      0.50         1
    cycling       1.00      1.00      1.00         1
   standing       0.00      0.00      0.00         1
   bending1       1.00      1.00      1.00         1
      lying       1.00      1.00      1.00         1
    walking       1.00      1.00      1.00         1
    sitting       0.00      0.00      0.00         1

avg / total       0.62      0.71      0.64         7



  'precision', 'predicted', average, warn_for)


In [195]:
print(classification_report(y_true=y_test, y_pred = test_h_cls, target_names=sub_folders))

             precision    recall  f1-score   support

   bending2       0.00      0.00      0.00         1
    cycling       0.50      1.00      0.67         1
   standing       0.00      0.00      0.00         1
   bending1       1.00      1.00      1.00         1
      lying       0.33      1.00      0.50         1
    walking       1.00      1.00      1.00         1
    sitting       0.00      0.00      0.00         1

avg / total       0.40      0.57      0.45         7



  'precision', 'predicted', average, warn_for)
