In [2]:
#############
## IMPORTS ##
#############

import pandas as pd
import os.path
import numpy as np
import matplotlib
from scipy import signal


In [3]:
###################
## PREPROCESSING ##
###################
## Set the participant we're analyzing

PARTICIPANT = "user1"
DATA_1 = PARTICIPANT+'_1.csv'
DATA_2 = PARTICIPANT+'_2.csv'
DATA_3 = PARTICIPANT+'_3.csv'

#loading data in
file_paths = ['user_data\\'+PARTICIPANT+'\\'+DATA_1, 'user_data\\'+PARTICIPANT+'\\'+DATA_2, 'user_data\\'+PARTICIPANT+'\\'+DATA_1 ]
f = file_paths[0]
gen = pd.read_csv(f, names=['Sensor', 'Time', 'X', 'Y', 'Z'], on_bad_lines='skip') #general data

#accel is 10, gyro is 4
GYRO_ID = 4
ACCEL_ID = 10

#Separate data by sensor id 
gyro_df = gen[gen.Sensor == GYRO_ID]
accel_df = gen[gen.Sensor == ACCEL_ID]



#sort data by time 
gyro_df.sort_values('Time', inplace=True)
accel_df.sort_values('Time', inplace=True)

#data is now separated and ordered by time, ready for M-point filer
print(gyro_df.head())
print(accel_df.head())


    Sensor          Time        X         Y         Z
11       4  7.210914e+11 -0.11638  0.049002  0.068976
8        4  7.210947e+11 -0.11638  0.049002  0.068976
4        4  7.210969e+11 -0.11638  0.049002  0.068976
13       4  7.211012e+11 -0.11638  0.049002  0.068976
9        4  7.211076e+11 -0.11638  0.049002  0.068976
    Sensor          Time         X         Y         Z
12      10  7.210794e+11 -1.297232 -1.685075  0.404865
10      10  7.210914e+11 -1.297232 -1.685075  0.404865
2       10  7.210969e+11 -1.297232 -1.685075  0.404865
7       10  7.211076e+11 -1.297232 -1.685075  0.404865
1       10  7.211231e+11 -1.297232 -1.685075  0.404865


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [4]:
##################################
# APPLYING MOVING AVERAGE FILTER #
##################################

#Applying Moving Average Filter 


M = 10 #M-point filter

gyro_df['MA_Time'] = gyro_df['Time'].rolling(M).mean()
gyro_df['MA_X'] = gyro_df['X'].rolling(M).mean()
gyro_df['MA_Y'] = gyro_df['Y'].rolling(M).mean()
gyro_df['MA_Z'] = gyro_df['Z'].rolling(M).mean()

accel_df['MA_Time'] = accel_df['Time'].rolling(M).mean()
accel_df['MA_X'] = accel_df['X'].rolling(M).mean()
accel_df['MA_Y'] = accel_df['Y'].rolling(M).mean()
accel_df['MA_Z'] = accel_df['Z'].rolling(M).mean()


#Creating axis vectors 

N = 300 #number of samples for a profile feature 
M_IDX = N + M #index of the Nth sample (accounts for NaNs of first M rows)

#X axis 
x_a = accel_df.loc[:, "MA_X"]
x_a = list(x_a[M-1:M_IDX]) #this is done to avoid a keyerror in the loc function 

x_g = gyro_df.loc[:, "MA_X"]
x_g = list(x_g[M-1:M_IDX])  

#y axis
y_a = accel_df.loc[:, "MA_Y"]
y_a = list(y_a[M-1:M_IDX])

y_g = gyro_df.loc[:, "MA_Y"]
y_g = list(y_g[M-1:M_IDX]) 

#z axis
z_a = accel_df.loc[:, "MA_Z"]
z_a = list(z_a[M-1:M_IDX])

z_g = accel_df.loc[:, "MA_Z"]
z_g = list(z_g[M-1:M_IDX])

features = {'x_a': x_a, 'y_a': y_a, 'z_a': z_a, 'x_g': x_g, 'y_g': y_g, 'z_g': z_g}
features_df = pd.DataFrame.from_dict(features) #mostly for presentation purposes, will come in handy for feature extraction

print(features_df)

#preprocessing is now completed

          x_a       y_a       z_a       x_g       y_g       z_g
0   -0.862802 -1.437326  0.404291 -0.102052  0.046153  0.404291
1   -0.781090 -1.364904  0.396533 -0.089642  0.040480  0.396533
2   -0.687134 -1.295530  0.390052 -0.074222  0.037284  0.390052
3   -0.594113 -1.219587  0.376317 -0.054382  0.036272  0.376317
4   -0.491355 -1.144852  0.359657 -0.034728  0.035287  0.359657
..        ...       ...       ...       ...       ...       ...
296 -0.257069 -0.236286  0.839527 -0.134277  0.075607  0.839527
297 -0.261433 -0.229763  0.794429 -0.149217  0.071080  0.794429
298 -0.255331 -0.214992  0.744446 -0.153025  0.061572  0.744446
299 -0.246414 -0.208342  0.665234 -0.135795  0.051426  0.665234
300 -0.238131 -0.199820  0.586020 -0.099469  0.044448  0.586020

[301 rows x 6 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gyro_df['MA_Time'] = gyro_df['Time'].rolling(M).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gyro_df['MA_X'] = gyro_df['X'].rolling(M).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gyro_df['MA_Y'] = gyro_df['Y'].rolling(M).mean()
A value is trying to be set on a copy of a slice fro

In [19]:
########################
## FEATURE EXTRACTION ##
########################


#Mean, Median, Variance, Average Absolute
#Difference of Peaks, Range, Mode, Covariance,
#Mewan Absolute Deviation (MAD), Inter-
#quartile Range (IQR), correlation between axes
#(xy, yz, xz), Skewness, Kurtosis

feature_set = { 'mean': [], 'median': [], 'variance': [], 'AADP': [], 'range': [], 'mode':[], 
                'covariance': [], 'mad': [], 'iqr': [], 'correlation': [], 'skewness': [], 'kurtosis': [] }

for column in features_df:
    feature_set['mean'].append(features_df[column].mean())
    feature_set['median'].append(features_df[column].median())
    feature_set['variance'].append(features_df.var()[column])

    feature_set['range'].append(features_df[column].max() - features_df[column].min())
    feature_set['mode'].append(features_df[column].mode().iat[0])
   # feature_set['covariance'].append(features_df.cov())
    feature_set['mad'].append(0)
print(feature_set)

{'mean': [0.009827475813953558, -0.04463475788642186, 0.2626198391621262, -0.023838920205647877, 0.062348494731893626, 0.2626198391621262], 'median': [-0.06538293329999997, 0.015398809100000011, 0.3287486996, -0.006231797299999997, 0.0035686375999999798, 0.3287486996], 'variance': [0.13196202589556125, 0.7593903424277564, 0.4625987131061106, 0.28766434659070955, 0.16475678381493467, 0.4625987131061106], 'AADP': [], 'range': [1.824250421, 4.81085038, 4.196423614, 3.712073789, 2.1073860200000003, 4.196423614], 'mode': [-0.8628018, -2.4304165699999998, -2.62524251, -1.40638876, -0.4959072300000001, -2.62524251], 'covariance': [], 'mad': [0, 0, 0, 0, 0, 0], 'iqr': [], 'correlation': [], 'skewness': [], 'kurtosis': []}
