# Features
- Variance of gait stretch - done
- Variance of step time - done
- Average step time - done
- Number of steps in a given time interval
- Vertical variance
- Horizontal variance
- Average step length
- Average velocity
- Cadence
- Skewness
- Sway area
- (FFT) Average Power
- (FFT) SNR

In [1]:
from utils import *

In [2]:
import gaitdynamics as gd

max_threshold = 20
min_stretch = 4
max_stretch = 20
min_step = 300
max_step = 700

slice_interval = 15 # seconds

In [3]:
import matplotlib.pyplot as plt

In [4]:
from sklearn import datasets, svm, metrics

In [5]:
import os
data_directory = "data/data_with_goggle/"
files = os.listdir(data_directory)
gyro_files = [file for file in files if file.startswith('gyro')]
accel_files = [file for file in files if file.startswith('accel')]
header_names = [file[5:] for file in gyro_files] # remove "gyro_"
print(len(gyro_files))
print(len(accel_files))

75
75


In [6]:
def plot_graph(data):
    plt.plot(list(map(lambda x:x[0], data)), list(map(lambda x:x[1], data)))
    plt.gcf().autofmt_xdate()
    # Erase y axis
    plt.gca().get_yaxis().set_visible(False)
    plt.show()

In [7]:
def calc_magnitude(t, x, y, z):
    magnitude = gd.signal.magnitude_acceleration(x, y, z)
    return min(magnitude, max_threshold)

def interpolate(time, x, y, z, interval=10):
    timeStart = math.ceil(time[0] / interval) * interval
    timeEnd = math.ceil(time[len(time) - 1] / interval) * interval

    interpTime = np.arange(timeStart, timeEnd, interval)
    interpX = np.interp(interpTime, time, x)
    interpY = np.interp(interpTime, time, y)
    interpZ = np.interp(interpTime, time, z)

    return [interpTime, interpX, interpY, interpZ]

def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

In [8]:
def gait_features(accel_T):
    # Calculate magnitudes
    magnitudes = np.array([calc_magnitude(*point) for point in zip(*accel_T)])

    # Apply filter to magnitudes
    magnitudes = scipy.signal.savgol_filter(magnitudes, 41, 3) # window_size=41 / dimension=3
    
    # Calculate peak and valley
    peak = gd.peak.find_peaks(accel_T[0], magnitudes, peak_type='peak', min_dist=30)
    both = gd.peak.find_peaks(accel_T[0], magnitudes, peak_type='both', min_dist=30, plot=False)
    #both = gd.peak.find_peaks(accel_T[0][:500], magnitudes[:500], peak_type='both', min_dist=30, plot=True)
    
    #print(accel_T[0][:500], magnitudes[:500])
    
    # Calculate step size / gait stretch
    stretch = [x1 - x2 for (x1, x2) in zip(both[1], both[1][1:])]
    step = [x1 - x2 for (x2, x1) in zip(peak[0], peak[0][1:])]

    # Reject outliers    
    stretch = reject_outliers(np.array(stretch))
    step = reject_outliers(np.array(step)) 
    
    # Filter out outliers
    stretch = [s for s in stretch if min_stretch < s < max_stretch]
    step = [s for s in step if min_step < s < max_step]
    
    # Calculate its variance
    var_stretch = np.var(stretch)
    var_step = np.var(step)
    avg_step = np.mean(step)
    
    features = [var_stretch, var_step, avg_step]
    return features

In [9]:
import numpy as np
import scipy
import math

check_dir("input_data")

y_mapping = {"sober":0 , "blue":1 , "black":2}

n = len(header_names)

X = np.zeros(shape=(n, 3))
y = np.zeros(shape=(n,))

for idx, name in enumerate(header_names):
    color, person, phone, number = name[:-4].split('_') # remove ".txt" and split by '_'
    
#     if phone != "v10":
#         continue
#     if (color, person, phone, number) != tuple("black ys v10 6".split()):
#        continue
    #print(color, person, phone, number)
    
    gyro = read_filedata(data_directory + 'gyro_' + name)
    accel = read_filedata(data_directory + 'accel_' + name)
    
    #print("gyroscope data points : ", len(gyro))
    #print("accelerometer data points : ", len(accel))
    gyro = preprocess(gyro)
    accel = preprocess(accel)
    
    gyro_T = list(zip(*gyro))
    accel_T = list(zip(*accel))
    
    # Perform FFT
    gyro_fft = list(map(np.fft.fft, gyro_T[1:]))
    accel_fft = list(map(np.fft.fft, accel_T[1:]))
    
    def accel_date2ms(date):
        delta = date-accel_T[0][0]
        return (delta.microseconds / 1000) + delta.seconds * 1000
    
    def gyro_date2ms(date):
        delta = date-gyro_T[0][0]
        return (delta.microseconds / 1000) + delta.seconds * 1000

    # Convert datetime to ms from start
    accel_T[0] = list(map(accel_date2ms, accel_T[0]))
    gyro_T[0] = list(map(gyro_date2ms, gyro_T[0]))
    
    # Interpolate data
    accel_T = interpolate(*accel_T)
    gyro_T = interpolate(*gyro_T)    
    
    # Extract features
    features = gait_features(accel_T)
    print(features)
    
    X[idx] = features
    y[idx] = y_mapping[color]
    
    

[0.50727974192528846, 181.72249886826617, 494.14893617021278]
[0.1355173171188962, 1486.8872817590764, 573.24786324786328]
[0.73013995530358677, 1538.5902456390177, 516.22641509433959]
[0.17186273894538198, 2218.7255859375, 569.84375]
[0.34998154878815679, 411.21458680521516, 511.28712871287127]
[0.56878020911831906, 464.26820899911775, 500.29702970297029]
[0.49450299290053107, 906.48860502249966, 474.33734939759034]
[1.3456902883167086, 656.60491493383756, 512.78260869565213]
[0.38035671018356543, 383.00173451688602, 503.73737373737373]
[0.24428350434164611, 1012.4854859640736, 573.38842975206614]
[0.52436656900975465, 1721.3664940828401, 509.13461538461536]
[0.41207964613697451, 650.0, 520.0]
[0.26577750631171615, 670.69252077562328, 545.78947368421052]
[1.2003171701906468, 574.70062405127339, 464.02597402597405]
[0.640401494466453, 687.29971327373914, 474.80519480519479]
[0.42507474778283333, 1066.4228014022253, 479.50617283950618]
[0.59435364047037265, 416.74345652387012, 509.70297

In [10]:
print(X.shape)
print(type(X))
print(y.shape)
print(type(y))

(75, 3)
<class 'numpy.ndarray'>
(75,)
<class 'numpy.ndarray'>


In [11]:
# Save data
import pickle
input_data = (X, y)
check_dir("input_data")
with open("input_data/input_data.pkl", "wb") as f:
    pickle.dump(input_data, f)