## Load Dataset and Create Dataframe

In [1]:
import pandas as pd
import os
%load_ext autoreload
%autoreload 2

In [2]:
def load_to_dataframe(path, field_names, device):
    data = pd.read_csv(path, header=None)
    data = data.rename(columns={i : field_names[i] for i in range(len(field_names))})
    
    # change column order
    cols = data.columns.to_list()
    cols.remove('timestamp')
    cols.insert(0, 'timestamp')
    data = data[cols]
    
    data[cols[-1]] = data[cols[-1]].apply(lambda x: float(x.replace(';', '')))
    
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    # add device columns
    if device == 'phone':
        data['phone'] = 1
        data['watch'] = 0
    else:
        data['phone'] = 0
        data['watch'] = 1
        
    return data

In [3]:
dirs = ['datasets/phone/accel/', 'datasets/phone/gyro/', 'datasets/watch/accel/', 'datasets/watch/gyro/']

def make_filenames_list(path):
    filenames_list = os.listdir(path)
    filenames_list = [(path + file) for file in filenames_list if file != '.DS_Store']
    return sorted(filenames_list)

datasets = [make_filenames_list(dirs[0]), make_filenames_list(dirs[1]), make_filenames_list(dirs[2]), make_filenames_list(dirs[3])]

In [4]:
labels = {
    'A': 'walking',
    'B': 'jogging',
    'C': 'stairs',
    'D': 'sitting',
    'E': 'standing',
    'F': 'typing',
    'G': 'brushing_teeth',
    'H': 'eating_soup',
    'I': 'eating_chips',
    'J': 'eating_pasta',
    'K': 'drinking_from_cup',
    'L': 'eating_sandwich',
    'M': 'kicking',
    'O': 'playing_catch',
    'P': 'dribbling',
    'Q': 'writing',
    'R': 'clapping',
    'S': 'folding',
}

field_names_accel = ['id', 'activity', 'timestamp', 'Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)']
field_names_gyro = ['id', 'activity', 'timestamp', 'Gyroscope x (m/s^2)', 'Gyroscope y (m/s^2)', 'Gyroscope z (m/s^2)']

In [5]:
def make_df(path, field_names, device):
    # make dataframe
    dataframe = load_to_dataframe(path, field_names, device)
    # split by activity
    grouped = dataframe.groupby('activity')
    grouped_list = [grouped.get_group(x) for x in grouped.groups]
    return grouped_list

In [22]:
len(datasets[0])

51

In [None]:
participant_no = 

In [6]:
phone_accel_file = datasets[0][0]
phone_gyro_file = datasets[1][0]
watch_accel_file = datasets[2][0]
watch_gyro_file = datasets[3][0]

In [7]:
phone_accel_list = make_df(phone_accel_file, field_names_accel, 'phone')
phone_gyro_list = make_df(phone_gyro_file, field_names_gyro, 'phone')
watch_accel_list = make_df(watch_accel_file, field_names_accel, 'watch')
watch_gyro_list = make_df(watch_gyro_file, field_names_accel, 'watch')

In [8]:
phone_accel_list[3]

Unnamed: 0,timestamp,id,activity,Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),phone,watch
10719,1970-01-03 19:06:38.622217012,1600,D,4.059128,5.965424,6.240402,1,0
10720,1970-01-03 19:06:38.672571016,1600,D,3.943848,5.636643,6.233505,1,0
10721,1970-01-03 19:06:38.722925020,1600,D,3.581696,5.638428,6.787384,1,0
10722,1970-01-03 19:06:38.773279024,1600,D,3.441956,5.564392,6.766052,1,0
10723,1970-01-03 19:06:38.823633028,1600,D,3.074692,5.422821,6.502975,1,0
...,...,...,...,...,...,...,...,...
14286,1970-01-03 19:09:38.234967618,1600,D,4.365356,4.304474,6.787476,1,0
14287,1970-01-03 19:09:38.285321622,1600,D,4.361465,4.303711,6.867676,1,0
14288,1970-01-03 19:09:38.335675626,1600,D,4.322098,4.285400,6.844696,1,0
14289,1970-01-03 19:09:38.386029630,1600,D,4.300400,4.312668,6.874374,1,0


## Preprocess data

In [9]:
from Chapter2.CreateDataset_old import CreateDataset
from util.VisualizeDataset import VisualizeDataset
from util import util
from pathlib import Path
import copy
import sys
import pickle
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from tqdm import tqdm
from functools import reduce

In [10]:
preprocessed_df = CreateDataset('', granularity=250)

In [11]:
for activity_df in phone_accel_list:
    preprocessed_df.add_numerical_dataset(activity_df, 'accel')
preprocessed_df.concat_datasets('accel')

In [12]:
# preprocessed_df.accel_table.loc[preprocessed_df.accel_table['activity'] == 'D', :]

In [13]:
for activity_df in phone_gyro_list:
    preprocessed_df.add_numerical_dataset(activity_df, 'gyro')
preprocessed_df.concat_datasets('gyro')

In [14]:
preprocessed_df.gyro_table

Unnamed: 0_level_0,id,Gyroscope x (m/s^2),Gyroscope y (m/s^2),Gyroscope z (m/s^2),phone,watch,activity
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-01-03 22:03:27.750,1600,-0.864174,0.156349,0.526207,1,0,A
1970-01-03 22:03:28.000,1600,-0.354056,0.751385,-0.012289,1,0,A
1970-01-03 22:03:28.250,1600,-0.166672,-0.303232,-0.161414,1,0,A
1970-01-03 22:03:28.500,1600,0.811801,-0.382571,-0.139578,1,0,A
1970-01-03 22:03:28.750,1600,0.207245,0.057999,0.322696,1,0,A
...,...,...,...,...,...,...,...
1970-01-03 19:55:04.750,1600,0.009766,0.063428,-0.049002,1,0,S
1970-01-03 19:55:05.000,1600,0.097223,0.203210,-0.083481,1,0,S
1970-01-03 19:55:05.250,1600,0.071344,0.233640,0.002460,1,0,S
1970-01-03 19:55:05.500,1600,0.250998,-0.294223,-0.126685,1,0,S


In [15]:
preprocessed_df.merge_datasets()

In [16]:
cols = ['id', 'activity', 'phone', 'watch', 'Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)', 
        'Gyroscope x (m/s^2)', 'Gyroscope y (m/s^2)', 'Gyroscope z (m/s^2)', '_merge']

In [17]:
preprocessed_df.data_table = preprocessed_df.data_table[cols]

In [18]:
preprocessed_df.data_table

Unnamed: 0_level_0,id,activity,phone,watch,Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),Gyroscope x (m/s^2),Gyroscope y (m/s^2),Gyroscope z (m/s^2),_merge
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1970-01-03 19:06:38.500,1600,D,1,0,3.861557,5.746831,6.420430,,,,left_only
1970-01-03 19:06:38.750,1600,D,1,0,3.079541,5.385727,6.648401,-0.144009,-0.034305,-0.078549,both
1970-01-03 19:06:39.000,1600,D,1,0,3.340332,5.141748,6.791879,-0.093579,-0.298019,-0.155127,both
1970-01-03 19:06:39.250,1600,D,1,0,3.803125,5.479275,6.434729,0.116098,-0.237854,-0.118774,both
1970-01-03 19:06:39.500,1600,D,1,0,4.105029,5.601306,6.217108,-0.035977,0.075006,0.134689,both
...,...,...,...,...,...,...,...,...,...,...,...
1970-01-03 22:39:10.000,1600,G,1,0,5.044189,3.732083,6.684763,0.000970,-0.003876,0.001581,both
1970-01-03 22:39:10.250,1600,G,1,0,5.054422,3.716641,6.706970,0.001666,-0.005283,-0.000797,both
1970-01-03 22:39:10.500,1600,G,1,0,5.015957,3.664814,6.774857,-0.003479,0.011452,0.005287,both
1970-01-03 22:39:10.750,1600,G,1,0,5.053854,3.683719,6.759817,-0.005792,0.004599,0.000037,both


In [19]:
preprocessed_df.data_table.to_csv('intermediate_datafiles/preprocessed_data')

In [20]:
# preprocessed_df.data_table.loc[preprocessed_df.data_table['activity'] == 'D', :]

In [21]:
preprocessed_df.data_table.loc[preprocessed_df.data_table['_merge'] != 'both', :]

Unnamed: 0_level_0,id,activity,phone,watch,Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),Gyroscope x (m/s^2),Gyroscope y (m/s^2),Gyroscope z (m/s^2),_merge
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1970-01-03 19:06:38.500,1600,D,1,0,3.861557,5.746831,6.42043,,,,left_only
1970-01-03 19:43:46.500,1600,R,1,0,2.68383,7.984047,4.449082,,,,left_only
1970-01-03 20:03:30.000,1600,Q,1,0,2.140312,8.031555,4.330978,,,,left_only
1970-01-03 20:14:53.000,1600,I,1,0,2.968567,6.662826,5.761871,,,,left_only
1970-01-03 20:17:53.000,1600,I,1,0,,,,0.013474,-3.1e-05,-0.001984,right_only
1970-01-03 20:22:58.750,1600,C,1,0,-0.986542,9.240733,-0.443212,,,,left_only
1970-01-03 21:47:13.000,1600,L,1,0,1.266805,7.578074,5.370046,,,,left_only
1970-01-03 21:59:47.500,1600,B,1,0,-0.882207,15.235133,1.209178,,,,left_only
1970-01-03 22:03:27.500,1600,A,1,0,-0.622246,9.281143,1.036041,,,,left_only
1970-01-03 22:09:01.250,1600,M,1,0,-1.816704,9.754887,1.008224,,,,left_only
