### ISSM CA3

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
import glob
import time

### Dataset attributes
Column 1: Timestamp in seconds  
Column 2: Timestamp in microseconds  
Column 3-15: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S1 (Left Calf)  
Column 16-28: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S2 (Left Thigh)  
Column 29-41: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S3 (Right Calf)  
Column 42-54: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S4 (Right Thigh)  
Column 55-67: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S5 (Back)  
Column 68-80: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S6 (Left Lower Arm)  
Column 81-93: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S7 (Left Upper Arm)  
Column 94-106: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S8 (Right Lower Arm)  
Column 107-119: [AccX, AccY, AccZ, GyrX, GyrY, GyrZ, MagX, MagY, MagZ, Q1, Q2, Q3, Q4] of sensor S9 (Right Upper Arm)  
Column 120: Label (see activity set) [Walking = 1] [Jogging = 2] [Running = 3]

### Run following cells for the following dataset structure:
* df_walk : all 17 subjects + walking + (accel + gyro + quart)  
* df_jog : all 17 subjects + jogging + (accel + gyro + quart)  
* df_run : all 17 subjects + running + (accel + gyro + quart)  
    
Col[0]: Timestamp in seconds  
Col[1]: Timestamp in microseconds  
Col[67-79]: Sensor S6 (without Mag sensor [74 - 76])  
Col[93-105]: Sensor S8 (without Mag sensor [100 - 103])  
Col[119]: Activity Label

In [31]:
data_dir = '..\dataset'
# This method reads all dataset and filter out walking, jogging and running activities
def readcsvDataset(directory):
    files = glob.glob(directory + '\*.csv')
    df_walk = []
    df_jog = []
    df_run = []
    for file in files:
        df_all = pd.read_csv(file, header=None, delim_whitespace=True)
        df_walk.append(df_all[df_all[119] == 1])
        df_jog.append(df_all[df_all[119] == 2])
        df_run.append(df_all[df_all[119] == 3])
    return pd.concat(df_walk, ignore_index=True), pd.concat(df_jog, ignore_index=True), pd.concat(df_run, ignore_index=True)

In [57]:
# This method removes all positional sensors other than left/right lower arm, as well as Magnetic sensors
def removeOtherSensor(df_walk, df_jog, df_run):
    # [2:67] S1 to S5
    # [73:76] Mag sensor for S6
    # [80:93] S7
    # [99:102] Mag sensor for S8
    # [106:119] S9
    idx = np.r_[2:67, 73:76, 80:93, 99,102, 106:119]
    return df_walk.drop(df_walk.columns[idx], axis=1), df_jog.drop(df_jog.columns[idx], axis=1), df_run.drop(df_run.columns[idx], axis=1)

In [19]:
%%time
dfw, dfj, dfr = readcsvDataset(data_dir)

Wall time: 34.3 s


In [28]:
dfwalk, dfjog, dfrun = removeOtherSensor(dfw, dfj, dfr):

In [26]:
dfwalk.shape

(45638, 24)

### Run following cells for the following dataset structure:
* df_accel: all 17 subjects + (walk || jog || run) + accel
* df_gyro: all 17 subjects + (walk || jog || run) + gyro
* df_quart: all 17 subjects + (walk || jog || run) + quart

In [59]:

def splitSensor(df):
    # index for time
    idx_time = [0,1]
    # index for activity
    idx_activity = [119]
    # index for accel sensor for S6 and S8
    idx_accel = idx_time + [67, 68, 69, 93, 94, 95] + idx_activity
    # index for gryo sensor for S6 and S8
    idx_gyro = idx_time + [70, 71, 72, 96, 97, 98] + idx_activity
    # index for quart sensor for S6 and S8
    idx_quart = idx_time + [76, 77, 78, 79, 102, 103, 104, 105] + idx_activity
    
    return df[idx_accel], df[idx_gyro], df[idx_quart]

In [60]:
dfaccel, dfgyro, dfquart = splitSensor(dfr)

In [61]:
dfaccel.shape

(40142, 9)

In [62]:
dfgyro.shape

(40142, 9)

In [63]:
dfquart.shape

(40142, 11)