In [1]:
# -*- coding:utf-8 -*-
import logging
from datetime import datetime
import numpy as np
import pandas as pd
import glob
from decimal import Decimal, ROUND_HALF_UP, ROUND_HALF_EVEN

In [None]:
import sys
sys.path.append('../scripts')

In [69]:
import matplotlib.pyplot as plt
%matplotlib inline

In [160]:
from statsmodels.distributions.empirical_distribution import ECDF
from obtain_features import ObtainFeatures

In [173]:
def create_features(acc_raw, gyro_raw):
    of = ObtainFeatures(acc_raw, gyro_raw)
    
    # Remove noises by median filter & Butterworth filter
    acc_raw = of.apply_median_filter(acc_raw)
    acc_raw = of.apply_butterworth_filter(acc_raw)
    gyro_raw = of.apply_median_filter(gyro_raw)
    gyro_raw = of.apply_butterworth_filter(gyro_raw)
    
    # Sample signals in fixed-width sliding windows
    tAccXYZ, tBodyGyroXYZ = of.segment_signal(acc_raw, gyro_raw)
    
    # Separate acceleration signal into body and gravity acceleration signal
    tBodyAccXYZ, tGravityAccXYZ = [], []
    for acc in tAccXYZ:
        body_acc, grav_acc = of.remove_gravity(acc)
        tBodyAccXYZ.append(body_acc)
        tGravityAccXYZ.append(grav_acc)

    features = np.empty((0, 60))
    idx = np.linspace(0, tBodyAccXYZ[0].shape[0]-1, 10)
    idx = [int(Decimal(str(ix)).quantize(Decimal('0'), rounding=ROUND_HALF_UP)) for ix in idx]
    
    for i in range(len(tBodyAccXYZ)):
        feature_vector = np.array([])
        for axis in ['x', 'y', 'z']:
            ecdf = ECDF(tBodyAccXYZ[i][axis].values)  # fit
            x = ecdf.x[1:]
            feat = x[idx]
            feature_vector = np.hstack([feature_vector, feat])
        
        for axis in ['x', 'y', 'z']:
            ecdf = ECDF(tBodyGyroXYZ[i][axis].values)  # fit
            x = ecdf.x[1:]
            feat = x[idx]
            feature_vector = np.hstack([feature_vector, feat])

        features = np.vstack([features, feature_vector])
    
    return features

In [185]:
def create_features_all():    
    train_subjects = [1, 3, 5, 6, 7, 8, 11, 14, 15, 16, 17, 19, 21, 22, 23, 25, 26, 27, 28, 29, 30]
    test_subjects = [2, 4, 9, 10, 12, 13, 18, 20, 24]
    
    root = '../data/hapt_data_set/RawData/'
    acc_files = sorted(glob.glob(root + 'acc*.txt'))
    gyro_files = sorted(glob.glob(root + 'gyro*.txt'))
    
    label_info = pd.read_table(root + 'labels.txt', sep=' ', header=None, names=['ExpID', 'UserID', 'ActID', 'ActStart', 'ActEnd'])
    
    X_train, X_test = np.empty((0, 60)), np.empty((0, 60))
    
    # for acc_file, gyro_file in zip(acc_files, gyro_files):
    for acc_file, gyro_file in zip(acc_files, gyro_files):
        exp_id = int(acc_file.split('exp')[1][:2])
        user_id = int(acc_file.split('user')[1][:2])
        
        print(f'User ID: {user_id}', end=', ')
        
        temp_label_info = label_info[(label_info.ExpID==exp_id)&(label_info.UserID==user_id)&(label_info.ActID.isin([1,2,3,4,5,6]))]
    
        acc_raw = pd.read_table(acc_file, sep=' ', header=None, names=['x', 'y', 'z'])
        gyro_raw = pd.read_table(gyro_file, sep=' ', header=None, names=['x', 'y', 'z'])
        
        for _, _, act_id, act_start, act_end in temp_label_info.values:
            temp_acc_raw = acc_raw.iloc[act_start:act_end+1]
            temp_gyro_raw = gyro_raw.iloc[act_start:act_end+1]
            features = create_features(temp_acc_raw, temp_gyro_raw)
            if user_id in train_subjects:
                X_train = np.vstack((X_train, features))
            else:
                X_test = np.vstack((X_test, features))
    
    columns = [f'tBody{sensor}ECDF-{axis}{i}' for sensor in ['Acc', 'Gyro'] for axis in ['X', 'Y', 'Z'] for i in range(10)]
    X_train = pd.DataFrame(X_train, columns=columns)
    X_test = pd.DataFrame(X_test, columns=columns)
    print(f'X_train {X_train.shape}')
    print(f'X_test {X_test.shape}')
    
    return X_train, X_test

In [175]:
X_train, X_test = create_features_all()

User ID: 1, User ID: 1, User ID: 2, User ID: 2, User ID: 3, User ID: 3, User ID: 4, User ID: 4, User ID: 5, User ID: 5, User ID: 6, User ID: 6, User ID: 7, User ID: 7, User ID: 8, User ID: 8, User ID: 9, User ID: 9, User ID: 10, User ID: 10, User ID: 10, User ID: 11, User ID: 11, User ID: 12, User ID: 12, User ID: 13, User ID: 13, User ID: 14, User ID: 14, User ID: 15, User ID: 15, User ID: 16, User ID: 16, User ID: 17, User ID: 17, User ID: 18, User ID: 18, User ID: 19, User ID: 19, User ID: 20, User ID: 20, User ID: 21, User ID: 21, User ID: 22, User ID: 22, User ID: 23, User ID: 23, User ID: 24, User ID: 24, User ID: 25, User ID: 25, User ID: 26, User ID: 26, User ID: 27, User ID: 27, User ID: 28, User ID: 28, User ID: 29, User ID: 29, User ID: 30, User ID: 30, X_train (7406, 60)
X_test (2993, 60)


In [186]:
X_train, X_test = create_features_all()

User ID: 1, User ID: 1, User ID: 2, User ID: 2, User ID: 3, User ID: 3, User ID: 4, User ID: 4, User ID: 5, User ID: 5, User ID: 6, User ID: 6, User ID: 7, User ID: 7, User ID: 8, User ID: 8, User ID: 9, User ID: 9, User ID: 10, User ID: 10, User ID: 10, User ID: 11, User ID: 11, User ID: 12, User ID: 12, User ID: 13, User ID: 13, User ID: 14, User ID: 14, User ID: 15, User ID: 15, User ID: 16, User ID: 16, User ID: 17, User ID: 17, User ID: 18, User ID: 18, User ID: 19, User ID: 19, User ID: 20, User ID: 20, User ID: 21, User ID: 21, User ID: 22, User ID: 22, User ID: 23, User ID: 23, User ID: 24, User ID: 24, User ID: 25, User ID: 25, User ID: 26, User ID: 26, User ID: 27, User ID: 27, User ID: 28, User ID: 28, User ID: 29, User ID: 29, User ID: 30, User ID: 30, X_train (7406, 60)
X_test (2993, 60)


In [192]:
X_train.head()

Unnamed: 0,tBodyAccECDF-X0,tBodyAccECDF-X1,tBodyAccECDF-X2,tBodyAccECDF-X3,tBodyAccECDF-X4,tBodyAccECDF-X5,tBodyAccECDF-X6,tBodyAccECDF-X7,tBodyAccECDF-X8,tBodyAccECDF-X9,...,tBodyGyroECDF-Z0,tBodyGyroECDF-Z1,tBodyGyroECDF-Z2,tBodyGyroECDF-Z3,tBodyGyroECDF-Z4,tBodyGyroECDF-Z5,tBodyGyroECDF-Z6,tBodyGyroECDF-Z7,tBodyGyroECDF-Z8,tBodyGyroECDF-Z9
0,-0.003461,-0.001327,-0.001135,-0.000941,-0.000344,0.000146,0.000293,0.000791,0.001552,0.003508,...,-0.007954,-0.002654,-0.001259,0.000153,0.001946,0.003077,0.004324,0.005765,0.008726,0.013297
1,-0.003138,-0.002501,-0.001429,-0.000844,-0.000305,-2.2e-05,0.000278,0.001047,0.001661,0.003362,...,-0.009107,-0.003402,-0.001398,3.6e-05,0.002229,0.003441,0.005872,0.009059,0.011393,0.020488
2,-0.005057,-0.002133,-0.001371,-0.000521,-0.000288,0.000416,0.001003,0.001546,0.002329,0.005705,...,-0.009107,-0.002133,-0.001185,0.000244,0.001945,0.004037,0.008866,0.012988,0.017336,0.028886
3,-0.005779,-0.002155,-0.001214,-0.001062,-0.000801,0.000107,0.00036,0.001424,0.001817,0.005043,...,-0.012525,-0.004372,-0.001447,0.000337,0.001672,0.003423,0.004373,0.006092,0.014999,0.028886
4,-0.005214,-0.001846,-0.000878,-0.000661,-0.00045,9.8e-05,0.000631,0.001369,0.002158,0.005246,...,-0.014773,-0.008015,-0.006083,-0.003116,-0.000887,0.001672,0.003917,0.005098,0.010876,0.021909
