In [54]:
# Imports 
import random
from scipy.signal import medfilt, butter, filtfilt
import pandas as pd
import numpy as np

In [55]:
# Constants
DATA_PATH = './data/'
DATASET_PATH = DATA_PATH + 'uci-data/'
MODELS_PATH = DATA_PATH + 'models/raw-models/'

In [56]:
# Functions
def get_raw_data_path(type, exp_id, user_id):
    exp = exp_id if len(exp_id) == 2 else "0" + exp_id
    user = user_id if len(user_id) == 2 else "0" + user_id
    return "RawData/{}_exp{}_user{}.txt".format(type, exp, user)

def apply_med_filter(column):
    return list( medfilt(np.array(column), kernel_size=5) )

# Function to design a low-pass Butterworth filter
def butter_lowpass(cutoff, fs, order):
    nyq = 0.5 * fs  # Nyquist frequency
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

# Function to apply the Butterworth filter
def apply_low_butter(data, cutoff, fs, order):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, np.array(data), axis=0)
    return y

In [57]:
def extract_features(acc_data, gyro_data, label):
    # First get each data array
    acc_x_data = np.array([x[0] for x in acc_data])
    acc_y_data = np.array([x[1] for x in acc_data])
    acc_z_data = np.array([x[2] for x in acc_data])

    gyro_x_data = np.array([x[0] for x in gyro_data])
    gyro_y_data = np.array([x[1] for x in gyro_data])
    gyro_z_data = np.array([x[2] for x in gyro_data])

    # Then apply a median filter and Butterworth filter
    acc_x_data = apply_low_butter(apply_med_filter(acc_x_data), 20, 50, 3)
    acc_y_data = apply_low_butter(apply_med_filter(acc_y_data), 20, 50, 3)
    acc_z_data = apply_low_butter(apply_med_filter(acc_z_data), 20, 50, 3)

    gyro_x_data = apply_low_butter(apply_med_filter(gyro_x_data), 20, 50, 3)
    gyro_y_data = apply_low_butter(apply_med_filter(gyro_y_data), 20, 50, 3)
    gyro_z_data = apply_low_butter(apply_med_filter(gyro_z_data), 20, 50, 3)

    # Then another Butterworth filter to get body and gravity acceleration

    body_acc_x_data = apply_low_butter(acc_x_data, 0.3, 50, 3)
    body_acc_y_data = apply_low_butter(acc_y_data, 0.3, 50, 3)
    body_acc_z_data = apply_low_butter(acc_z_data, 0.3, 50, 3)

    gravity_acc_x_data = acc_x_data - body_acc_x_data 
    gravity_acc_y_data = acc_y_data - body_acc_y_data 
    gravity_acc_z_data = acc_z_data - body_acc_z_data 

    # Now put all this data in a dictionary for ease of use
    data_dict = {
        "bodyAccX": body_acc_x_data,
        "bodyAccY": body_acc_y_data,
        "bodyAccZ": body_acc_z_data,
        "gravityAccX": gravity_acc_x_data,
        "gravityAccY": gravity_acc_y_data,
        "gravityAccZ": gravity_acc_z_data,
        "gyroX": gyro_x_data,
        "gyroY": gyro_y_data,
        "gyroZ": gyro_z_data,
    }

    # Feature extraction time!
    # Mean

    return None

In [58]:
# Data loading
raw_data_labels = open("labels.txt", 'r').readlines()

raw_path = None
raw_acc_data = None
raw_gyro_data = None

complete_dataset = pd.DataFrame()


for label in raw_data_labels:
    processed_label = label.strip("\n").split(" ")
    processed_label[3] = int(processed_label[3]) - 1
    processed_label[4] = int(processed_label[4])
    print("Loading Experiment ID: {} User ID: {}, Activity: {}".format(processed_label[0], processed_label[1], processed_label[2]))

    if get_raw_data_path('acc', processed_label[0], processed_label[1]) != raw_path:
        raw_path = get_raw_data_path('gyro', processed_label[0], processed_label[1])
        raw_gyro_data = open(raw_path, 'r').readlines()
        raw_path = get_raw_data_path('acc', processed_label[0], processed_label[1])
        raw_acc_data = open(raw_path, 'r').readlines()

    start = processed_label[3]
    end = processed_label[4]
    length = end-start
    i = 0
    while i + 128 < length:
        acc_data = [[float(y) for y in x.strip("\n").split(" ")] for x in raw_acc_data[start + i: start + i + 128]]
        gyro_data = [[float(y) for y in x.strip("\n").split(" ")]  for x in raw_acc_data[start + i: start + i + 128]]

        feature_column = extract_features(
            acc_data,
            gyro_data,
            processed_label[2]
        )
        i += 64

Loading Experiment ID: 1 User ID: 1, Activity: 5
[1.020833394742025, -0.1250000020616516, 0.1041666724366978]
[1.020833394742025, -0.1305555574387036, 0.09861111705964587]
[1.023611178508533, -0.1250000020616516, 0.09166666979933977]
[1.016666719092262, -0.1347222330884659, 0.09444444748786576]
[1.018055610975516, -0.1277777858281599, 0.07500000366818386]
[1.01944450285877, -0.1333333412052118, 0.08333333673376181]
[1.015277827209008, -0.1208333385678538, 0.08750000022755966]
[1.025000070391787, -0.1180555548013455, 0.07777778135670985]
[1.023611178508533, -0.1486111154531136, 0.0819444448505077]
[1.01944450285877, -0.1388888965822638, 0.07777778135670985]
[1.016666719092262, -0.1500000073363677, 0.07777778135670985]
[1.022222286625279, -0.1263888939449057, 0.07500000366818386]
[1.01944450285877, -0.1291666655554495, 0.08472222253903368]
[1.020833394742025, -0.1402777884655179, 0.07777778135670985]
Loading Experiment ID: 1 User ID: 1, Activity: 7
[1.015277827209008, -0.1416666681928075