In [95]:
import numpy as np
from matplotlib import pyplot as plt
from time import time
import pandas as pd

In [96]:
#the files are oppened and labeled as follows: user_device_sensor 
#this will not work if the files do not exist

#julia data 
j_p_accel = 'data/julia_normal_accel_B_phone.txt'
j_p_gyro = 'data/julia_normal_gyro_B_phone.txt'
j_w_accel = 'data/julia_normal_accel_B_watch.txt'
j_w_gyro = 'data/julia_normal_gyro_B_watch.txt'

In [97]:
#Dataframe objects are created from the opened files and labeled as follows: user_device,sensor_data 
#ts indicates timestamp, x_a  the x axis, y_a the y axis, and Z_a the z axis 
col_names = ['ts', 'x', 'y', 'z']

j_pa = pd.read_csv(j_p_accel, names=col_names, index_col=0)
j_pg = pd.read_csv(j_p_gyro, names=col_names, index_col=0)
j_wa = pd.read_csv(j_w_accel, names=col_names, index_col=0)
j_wg = pd.read_csv(j_w_gyro, names=col_names, index_col=0)

In [98]:
j_pa.head()

Unnamed: 0_level_0,x,y,z
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1434047893912,4.606444,-0.024541,6.731896
1434047893952,8.734048,1.785476,2.513313
1434047893992,5.990293,0.90441,2.546832
1434047894034,2.226608,3.806182,10.256162
1434047894072,10.702082,1.723227,5.783793


In [99]:
j_pa.describe()

Unnamed: 0,x,y,z
count,3062.0,3062.0,3062.0
mean,0.872869,9.586657,0.855081
std,0.879761,0.813154,0.860346
min,-4.582502,-0.852934,-18.220474
25%,0.727837,9.638457,0.81343
50%,0.770933,9.681553,0.894833
75%,0.828394,9.715072,0.961871
max,13.077129,15.825075,16.098013


In [100]:
def amplitude( array ):
    """
    Given a NumPy array object, calculcate the amplitude i.e. |max(c)-min(c)| for c in array
    """
    return np.abs( array.ptp() )

In [101]:
def absolute_mean_difference( array ):
    """
    Given a NumPy array object, calculcate the absolute mean difference i.e.
    
    return D_x = (sum_{i=1}^{n} |x_i - mean(x)|) / n
    
    """
    x_bar = array.mean()
    std = np.abs( array - x_bar )
    
    return std.mean()

In [102]:
def acceleration_mean( array_x, array_y, array_z):
    """
    Given three NumPy array objects for the x, y, and z axes, calculate the acceleation mean i.e.
    
    return x_acc = (sum_{i=1}^{n} sqrt(x_i^2+y_i^2+z_i^2))/ n
    
    """
    return (np.sqrt(array_x**2 + array_y**2 + array_z**2)).mean()

In [108]:
def Pearsons_coefficient_of_var(array):
    """
    Given a NumPy array object, calculate Pearson's coefficient of variation i.e. sqrt(variance(x))/mean(x)
    """
    return np.sqrt(array.var(ddof=0)) / array.mean()

In [109]:
features = dict()

features['amplitude'] = {'x': amplitude(j_pa.x), 'y':amplitude(j_pa.y), 'z':amplitude(j_pa.z)}
features['absolute_mean_difference'] = {'x': absolute_mean_difference(j_pa.x), 'y':absolute_mean_difference(j_pa.y), 'z':absolute_mean_difference(j_pa.z)}
features['acceleration_mean']= acceleration_mean(j_pa.x, j_pa.y, j_pa.z) 
features['mean'] = {'x': j_pa.x.mean(), 'y':j_pa.y.mean(), 'z':j_pa.z.mean()}
features['standard_devation']= {'x': j_pa.x.std(), 'y':j_pa.y.std(), 'z':j_pa.z.std()}
features['variance']= {'x': j_pa.x.var(ddof=0), 'y':j_pa.y.var(ddof=0), 'z':j_pa.z.var(ddof=0)} 
features['Pearsons_coefficient_of_var']= {'x': Pearsons_coefficient_of_var(j_pa.x), 'y':Pearsons_coefficient_of_var(j_pa.y), 'z':Pearsons_coefficient_of_var(j_pa.z)}


In [110]:
for k in features:
    print k, ':',  features[k]

acceleration_mean : 9.76727421134
Pearsons_coefficient_of_var : {'y': 0.084807552411498399, 'x': 1.0077318842455363, 'z': 1.0059938663696539}
amplitude : {'y': 16.678009360000001, 'x': 17.659630999999997, 'z': 34.318487000000005}
variance : {'y': 0.66100306733299463, 'x': 0.77372715624042376, 'z': 0.73995422691377977}
standard_devation : {'y': 0.81315374366568205, 'x': 0.87976128901417661, 'z': 0.86034642035745468}
absolute_mean_difference : {'y': 0.19659799325060201, 'x': 0.21369135534185077, 'z': 0.19093527686654341}
mean : {'y': 9.586657417071196, 'x': 0.87286869958589153, 'z': 0.85508068188438924}
