# Project 2. HMM 적용하여 데이터 모델링 해보기 (자유주제)

## Hidden Markov Model

* 은닉마코프모델 계산 및 구현
  * https://ratsgo.github.io/machine%20learning/2017/10/14/computeHMMs/
* https://web.stanford.edu/~jurafsky/slp3/A.pdf
  
## ~~Taxi Service Trajectory (TST)~~
* Taxi Service Trajectory (TST) Prediction Challenge 2015
  * http://www.geolink.pt/ecmlpkdd2015-challenge/index.html
  * Artificial Neural Networks Applied to Taxi Destination Prediction
    * https://arxiv.org/pdf/1508.00021.pdf
  
## Human Activity Recognition (HAR)
* ~~Smartphone Dataset for Human Activity Recognition (HAR) in Ambient Assisted Living (AAL) Data Set~~
  * [Dataset](https://archive.ics.uci.edu/ml/datasets/Smartphone+Dataset+for+Human+Activity+Recognition+%28HAR%29+in+Ambient+Assisted+Living+%28AAL%29)
  * Author
    * *Kadian Alicia Davis, Evans Boateng Owusu* 
  * Structure
    * Triaxial acceleration from the accelerometer (total acceleration)
      * `final_acc_train.txt`, `final_acc_test.txt`
    * Triaxial Angular velocity from the gyroscope. 
      * `final_gyro_train.txt`, `final_gyro_test.txt` 
    * A 561-feature vector with time and frequency domain variables 
      * `final_X_train.txt`, `final_X_test.txt`
    * The corresponding activity labels
      * `final_y_train.txt`, `final_y_test.txt`
* **Human Activity Recognition Using Smartphones Data Set**
  * [Dataset](https://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones)
  * Author
    * *Jorge L. Reyes-Ortiz, Davide Anguita, Alessandro Ghio, Luca Oneto and Xavier Parra*
  * Structure
    * Raw Data
      * `acc_exp#{1~61}_user{1~30}.txt`
      * `gyro_exp#{1~61}_user{1~30}.txt`
      * `labels.txt`

In [1]:
from scipy import io
import matplotlib.pyplot as plt
import numpy as np

labels = np.loadtxt('./HAR/RawData/labels.txt', delimiter=' ', dtype=int)

N = len(labels)
acc = np.empty((N), dtype=object)
gyro = np.empty((N), dtype=object)
y = np.empty((N),dtype=int)
actions =['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING',
          'STANDING', 'LAYING', 'STAND_TO_SIT', 'SIT_TO_STAND', 'SIT_TO_LIE', 
          'LIE_TO_SIT', 'STAND_TO_LIE', 'LIE_TO_STAND']

In [2]:
prev = ''
acc_file = np.empty([])
gyro_file = np.empty([])
for (i, row) in enumerate(labels):
    filename = 'exp{:02d}_user{:02d}.txt'.format(row[0], row[1])
    
    if prev != filename:
        acc_file = np.loadtxt('./HAR/RawData/acc_' + filename, delimiter=' ')
        gyro_file = np.loadtxt('./HAR/RawData/gyro_' + filename, delimiter=' ')
        prev = filename
        print(filename)
    
    acc[i] = acc_file[row[3]:row[4]+1, :]
    gyro[i] = gyro_file[row[3]:row[4]+1, :]
    y[i] = row[2]-1

exp01_user01.txt
exp02_user01.txt
exp03_user02.txt
exp04_user02.txt
exp05_user03.txt
exp06_user03.txt
exp07_user04.txt
exp08_user04.txt
exp09_user05.txt
exp10_user05.txt
exp11_user06.txt
exp12_user06.txt
exp13_user07.txt
exp14_user07.txt
exp15_user08.txt
exp16_user08.txt
exp17_user09.txt
exp18_user09.txt
exp19_user10.txt
exp20_user10.txt
exp21_user10.txt
exp22_user11.txt
exp23_user11.txt
exp24_user12.txt
exp25_user12.txt
exp26_user13.txt
exp27_user13.txt
exp28_user14.txt
exp29_user14.txt
exp30_user15.txt
exp31_user15.txt
exp32_user16.txt
exp33_user16.txt
exp34_user17.txt
exp35_user17.txt
exp36_user18.txt
exp37_user18.txt
exp38_user19.txt
exp39_user19.txt
exp40_user20.txt
exp41_user20.txt
exp42_user21.txt
exp43_user21.txt
exp44_user22.txt
exp45_user22.txt
exp46_user23.txt
exp47_user23.txt
exp48_user24.txt
exp49_user24.txt
exp50_user25.txt
exp51_user25.txt
exp52_user26.txt
exp53_user26.txt
exp54_user27.txt
exp55_user27.txt
exp56_user28.txt
exp57_user28.txt
exp58_user29.txt
exp59_user29.t

In [3]:
for i in range(N):
    print(acc[i].shape, gyro[i].shape, actions[y[i]])

(983, 3) (983, 3) STANDING
(160, 3) (160, 3) STAND_TO_SIT
(802, 3) (802, 3) SITTING
(165, 3) (165, 3) SIT_TO_STAND
(1015, 3) (1015, 3) STANDING
(288, 3) (288, 3) STAND_TO_LIE
(876, 3) (876, 3) LAYING
(197, 3) (197, 3) LIE_TO_SIT
(932, 3) (932, 3) SITTING
(192, 3) (192, 3) SIT_TO_LIE
(927, 3) (927, 3) LAYING
(191, 3) (191, 3) LIE_TO_STAND
(583, 3) (583, 3) WALKING
(895, 3) (895, 3) WALKING
(911, 3) (911, 3) WALKING
(965, 3) (965, 3) WALKING
(656, 3) (656, 3) WALKING_DOWNSTAIRS
(631, 3) (631, 3) WALKING_UPSTAIRS
(624, 3) (624, 3) WALKING_DOWNSTAIRS
(666, 3) (666, 3) WALKING_UPSTAIRS
(624, 3) (624, 3) WALKING_DOWNSTAIRS
(673, 3) (673, 3) WALKING_UPSTAIRS
(976, 3) (976, 3) STANDING
(206, 3) (206, 3) STAND_TO_SIT
(789, 3) (789, 3) SITTING
(156, 3) (156, 3) SIT_TO_STAND
(927, 3) (927, 3) STANDING
(268, 3) (268, 3) STAND_TO_LIE
(863, 3) (863, 3) LAYING
(184, 3) (184, 3) LIE_TO_SIT
(833, 3) (833, 3) SITTING
(237, 3) (237, 3) SIT_TO_LIE
(778, 3) (778, 3) LAYING
(242, 3) (242, 3) LIE_TO_STAND
(6

(307, 3) (307, 3) STAND_TO_SIT
(1044, 3) (1044, 3) SITTING
(182, 3) (182, 3) SIT_TO_STAND
(1452, 3) (1452, 3) STANDING
(297, 3) (297, 3) STAND_TO_LIE
(1168, 3) (1168, 3) LAYING
(245, 3) (245, 3) LIE_TO_SIT
(1222, 3) (1222, 3) SITTING
(251, 3) (251, 3) SIT_TO_LIE
(1311, 3) (1311, 3) LAYING
(206, 3) (206, 3) LIE_TO_STAND
(988, 3) (988, 3) WALKING
(1029, 3) (1029, 3) WALKING
(719, 3) (719, 3) WALKING_DOWNSTAIRS
(719, 3) (719, 3) WALKING_UPSTAIRS
(681, 3) (681, 3) WALKING_DOWNSTAIRS
(694, 3) (694, 3) WALKING_UPSTAIRS
(681, 3) (681, 3) WALKING_DOWNSTAIRS
(726, 3) (726, 3) WALKING_UPSTAIRS
(1306, 3) (1306, 3) STANDING
(227, 3) (227, 3) STAND_TO_SIT
(959, 3) (959, 3) SITTING
(176, 3) (176, 3) SIT_TO_STAND
(1189, 3) (1189, 3) STANDING
(244, 3) (244, 3) STAND_TO_LIE
(1195, 3) (1195, 3) LAYING
(165, 3) (165, 3) LIE_TO_SIT
(1069, 3) (1069, 3) SITTING
(199, 3) (199, 3) SIT_TO_LIE
(1232, 3) (1232, 3) LAYING
(192, 3) (192, 3) LIE_TO_STAND
(1013, 3) (1013, 3) WALKING
(947, 3) (947, 3) WALKING
(645, 3

In [18]:
obs = np.empty((N), dtype=object)
origin_data = np.empty((N, 3), dtype=object)

def get_observation(acc, gyro):
    time = acc.shape[0]
    obs = np.empty((time), dtype=int)
    for t in range(time):
        jerkNum = accNum = gyroNum = 0
        jerk = np.zeros((3), dtype=float)
        for i in range(3):
            if t > 0:
                jerk[i] = acc[t, i] - acc[t-1, i]
                
            if jerk[i] > 0:
                jerkNum += (1 << (2-i))
            if acc[t, i] > 0:
                accNum += (1 << (2-i))
            if gyro[t, i] > 0:
                gyroNum += (1 << (2-i))
                
#         obs[t] = accNum*8 + gyroNum
#         obs[t] = jerkNum*64 + accNum*8 + gyroNum
        obs[t] = jerkNum*8 + accNum
#         print(jerkNum, accNum, gyroNum)
    return obs

for i in range(N):
    obs[i] = get_observation(acc[i], gyro[i])
    origin_data[i] = [y[i], obs[i], i]

print(origin_data.shape)

(1214, 3)


## train_test_split
* `train_size : test_size = 90 : 10`

In [19]:
from sklearn.model_selection import train_test_split

# train, test split
train_data, test_data = train_test_split(origin_data, test_size=0.1)

# sort in charlabel
train_data = train_data[train_data[:, 0].argsort()]
test_data = test_data[test_data[:, 0].argsort()]

print('train:', train_data.shape)
print('test:', test_data.shape)
print(test_data)

train: (1092, 3)
test: (122, 3)
[[0
  array([ 6, 14, 14, 14, 30,  6, 30, 62, 62, 62, 46, 38, 38, 14, 44, 36, 44,
       52, 52, 52, 22, 54, 62, 22, 22, 14, 12, 12, 60, 54, 38,  6, 14, 14,
       14, 46, 38, 38, 36, 36,  4, 60, 60, 62, 44, 44, 36, 20, 20, 22, 22,
       62, 62, 38, 38,  6, 12, 12, 12, 12, 60,  4, 54,  6, 54, 30, 15, 15,
       45, 37, 52, 52, 54, 22, 30, 22,  6, 22, 14, 44, 60, 54, 22,  6, 14,
       30, 14, 46, 46, 38, 36,  4, 44, 36, 36, 60, 60, 52, 36,  4,  4, 22,
        6, 54, 62, 46, 38, 36, 12, 12, 28, 28, 28, 63, 55, 54, 46, 47, 47,
       39, 37, 37, 52, 63, 22, 14, 22,  6,  6, 12, 44, 60, 22, 22,  6, 14,
       14, 14, 14, 44, 54, 54, 46, 62, 54, 54, 38, 36, 44, 36,  4,  4, 20,
       20, 22, 62, 62, 46, 38,  4, 12, 12, 12, 28, 60, 54, 63, 54, 22, 14,
       47, 38, 36, 36, 52, 44, 22, 14, 22, 22, 22, 14,  6, 62,  6,  6,  6,
        6,  6, 12, 44, 44, 60, 52, 62, 54, 38,  6, 36, 12, 44, 52, 20, 20,
       22, 22, 62, 62, 38, 38, 12, 12, 28, 28, 28, 60, 63, 54,

# Human Activity Recognition HMM
## HMM Learn
* `hmmlearn` Tutorial
  * https://hmmlearn.readthedocs.io/en/latest/tutorial.html
* `MultinomialHMM` API Reference
  * https://hmmlearn.readthedocs.io/en/latest/api.html#multinomialhmm

In [21]:
from hmmlearn import hmm
import time

num_actions = len(actions)
models = np.empty((num_actions), dtype=object)
idx = np.zeros(num_actions + 1, dtype=int)
for i in range(num_actions):
    models[i] = hmm.MultinomialHMM(n_components=64, verbose=True, n_iter=1)

for i in range(train_data.shape[0]):
    idx[train_data[i][0]+1] = i+1;

# multinomial HMM learn
for i in range(num_actions):
    start_time = time.time()
    trainRange = range(idx[i], idx[i+1])
    print('Training', actions[i], 'model w.', idx[i+1]-idx[i], 'examples', end=' ')

    trainX = np.concatenate([train_data[j][1].reshape(-1, 1) for j in trainRange])
    lengths = [len(train_data[j][1]) for j in trainRange]

    models[i].fit(trainX, lengths)
    print("(elapsed time: {}s).".format(time.time() - start_time))

Training WALKING model w. 119 examples 

         1     -476407.8196             +nan


(elapsed time: 126.8526999950409s).
Training WALKING_UPSTAIRS model w. 160 examples 

         1     -428229.6193             +nan


(elapsed time: 108.17595529556274s).
Training WALKING_DOWNSTAIRS model w. 176 examples 

         1     -424513.0234             +nan


(elapsed time: 113.75565123558044s).
Training SITTING model w. 105 examples 

         1     -463174.2877             +nan


(elapsed time: 121.36390113830566s).
Training STANDING model w. 104 examples 

         1     -491930.5776             +nan


(elapsed time: 127.57761144638062s).
Training LAYING model w. 107 examples 

         1     -507038.7572             +nan


(elapsed time: 130.4429247379303s).
Training STAND_TO_SIT model w. 56 examples 

         1      -39892.5569             +nan
Fitting a model with 8127 free scalar parameters with only 7290 data points will result in a degenerate solution.


(elapsed time: 9.859299659729004s).
Training SIT_TO_STAND model w. 56 examples 

         1      -30402.4040             +nan


(elapsed time: 7.501377582550049s).
Training SIT_TO_LIE model w. 54 examples 

         1      -46550.1442             +nan


(elapsed time: 11.435280084609985s).
Training LIE_TO_SIT model w. 54 examples 

         1      -41776.5469             +nan


(elapsed time: 10.35712742805481s).
Training STAND_TO_LIE model w. 48 examples 

         1      -51179.9168             +nan


(elapsed time: 12.542938709259033s).
Training LIE_TO_STAND model w. 53 examples (elapsed time: 10.159754753112793s).


         1      -41105.1826             +nan


In [22]:
testSize = test_data.shape[0]
wrongCases = 0

print('Wrong Cases')
for i in range(testSize):
    testX = np.concatenate([test_data[i][1].reshape(-1, 1)])
    
    maxScore = models[0].score(testX)
    maxAction = 0
    
    for action in range(num_actions):
        score = models[action].score(testX)
        if maxScore < score:
            maxScore = score
            maxAction = action
            
    
    if not test_data[i][0] == maxAction:
        print(actions[test_data[i][0]], actions[maxAction], maxScore, test_data[i][0] == maxAction)
        wrongCases += 1
        
print('(Wrong, Total)', (wrongCases, testSize))
print('Accuracy: {}%'.format((1 - wrongCases/testSize) * 100))

Wrong Cases
WALKING WALKING_DOWNSTAIRS -3347.7268891372446 False
WALKING WALKING_UPSTAIRS -2299.4012896540416 False
WALKING WALKING_UPSTAIRS -2497.924297659702 False
WALKING WALKING_UPSTAIRS -1722.215789974349 False
WALKING WALKING_UPSTAIRS -2622.3012404043325 False
WALKING WALKING_UPSTAIRS -3614.739626923079 False
WALKING WALKING_UPSTAIRS -2851.135756707767 False
WALKING_UPSTAIRS STANDING -1899.2423636151755 False
WALKING_UPSTAIRS STANDING -1922.253452156441 False
WALKING_UPSTAIRS STANDING -1858.0651382575445 False
WALKING_UPSTAIRS WALKING -1585.1357072835888 False
WALKING_UPSTAIRS SIT_TO_LIE -2069.1333906617606 False
WALKING_UPSTAIRS STANDING -1860.9395118999355 False
WALKING_UPSTAIRS STANDING -1818.3006927516253 False
WALKING_UPSTAIRS STANDING -1729.538899779428 False
WALKING_UPSTAIRS STANDING -1792.1641887040887 False
WALKING_DOWNSTAIRS WALKING_UPSTAIRS -1567.9174071843863 False
WALKING_DOWNSTAIRS WALKING_UPSTAIRS -1949.3143002684844 False
WALKING_DOWNSTAIRS WALKING_UPSTAIRS -1742.

In [23]:
import pickle
import os
def save_hmm_models(models, name):
    if os.path.exists(name):
        os.system("rm -rf " + name)
    os.mkdir(name)
    for i in range(num_actions):
        filename = './{}/{}_{:02d}_{}.pkl'.format(name, name, i, actions[i])
        with open(filename, 'wb') as file: pickle.dump(models[i], file)
        print('Save', filename)
save_hmm_models(models, 'jerk8acc8iter1')

Save ./jerk8acc8iter1/jerk8acc8iter1_00_WALKING.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_01_WALKING_UPSTAIRS.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_02_WALKING_DOWNSTAIRS.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_03_SITTING.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_04_STANDING.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_05_LAYING.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_06_STAND_TO_SIT.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_07_SIT_TO_STAND.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_08_SIT_TO_LIE.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_09_LIE_TO_SIT.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_10_STAND_TO_LIE.pkl
Save ./jerk8acc8iter1/jerk8acc8iter1_11_LIE_TO_STAND.pkl
