In [16]:
# Imports 
from scipy.signal import medfilt, butter, filtfilt, find_peaks
from scipy.stats import median_abs_deviation, entropy, kurtosis, skew
import pandas as pd
import numpy as np

In [17]:
# Constants
DATA_PATH = './data/'
DATASET_PATH = DATA_PATH + 'uci-data/'
MODELS_PATH = DATA_PATH + 'models/raw-models/'

In [18]:
# Functions
def get_raw_data_path(type, exp_id, user_id):
    exp = exp_id if len(exp_id) == 2 else "0" + exp_id
    user = user_id if len(user_id) == 2 else "0" + user_id
    return "RawData/{}_exp{}_user{}.txt".format(type, exp, user)

def apply_med_filter(column):
    return list( medfilt(np.array(column), kernel_size=5) )

# Function to design a low-pass Butterworth filter
def butter_lowpass(cutoff, fs, order):
    nyq = 0.5 * fs  # Nyquist frequency
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

# Function to apply the Butterworth filter
def apply_low_butter(data, cutoff, fs, order):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, np.array(data), axis=0)
    return y

In [19]:
feature_names = set()
def extract_features(acc_data, gyro_data, label):
    # Get each data array
    acc_x_data = np.array([x[0] for x in acc_data])
    acc_y_data = np.array([x[1] for x in acc_data])
    acc_z_data = np.array([x[2] for x in acc_data])

    gyro_x_data = np.array([x[0] for x in gyro_data])
    gyro_y_data = np.array([x[1] for x in gyro_data])
    gyro_z_data = np.array([x[2] for x in gyro_data])

    # Then apply a median filter and Butterworth filter
    acc_x_data = apply_low_butter(apply_med_filter(acc_x_data), 20, 50, 3)
    acc_y_data = apply_low_butter(apply_med_filter(acc_y_data), 20, 50, 3)
    acc_z_data = apply_low_butter(apply_med_filter(acc_z_data), 20, 50, 3)

    gyro_x_data = apply_low_butter(apply_med_filter(gyro_x_data), 20, 50, 3)
    gyro_y_data = apply_low_butter(apply_med_filter(gyro_y_data), 20, 50, 3)
    gyro_z_data = apply_low_butter(apply_med_filter(gyro_z_data), 20, 50, 3)

    # Then another Butterworth filter to get body and gravity acceleration

    body_acc_x_data = apply_low_butter(acc_x_data, 0.3, 50, 3)
    body_acc_y_data = apply_low_butter(acc_y_data, 0.3, 50, 3)
    body_acc_z_data = apply_low_butter(acc_z_data, 0.3, 50, 3)

    gravity_acc_x_data = acc_x_data - body_acc_x_data 
    gravity_acc_y_data = acc_y_data - body_acc_y_data 
    gravity_acc_z_data = acc_z_data - body_acc_z_data 

    # Now put all this data in a dictionary for ease of use
    data_dict = {
        "bodyAccX": body_acc_x_data,
        "bodyAccY": body_acc_y_data,
        "bodyAccZ": body_acc_z_data,
        "gravityAccX": gravity_acc_x_data,
        "gravityAccY": gravity_acc_y_data,
        "gravityAccZ": gravity_acc_z_data,
        "gyroX": gyro_x_data,
        "gyroY": gyro_y_data,
        "gyroZ": gyro_z_data,
    }

    features = []

    
    # Get jerk and magnitude time series values
    dict_keys = list(data_dict.keys())
    for d in dict_keys:
        jerk_feature = d[:-1] + "Jerk" + d[-1]
        if "gravityAcc" not in d:
            data_dict[jerk_feature] = np.array(np.diff(data_dict[d])/0.02)
            

    data_dict["bodyAccMag"] = np.sqrt(body_acc_x_data**2 + body_acc_y_data**2 + body_acc_y_data**2)
    data_dict["bodyAccJerkMag"] = np.sqrt(data_dict["bodyAccJerkX"]**2 + data_dict["bodyAccJerkY"]**2 + data_dict["bodyAccJerkZ"]**2)
    data_dict["gravityAccMag"] = np.sqrt(gravity_acc_x_data**2 + gravity_acc_y_data**2 + gravity_acc_y_data**2)
    data_dict["gyroMag"] = np.sqrt(gyro_x_data**2 + gyro_y_data**2 + gyro_y_data**2)
    data_dict["gyroJerkMag"] = np.sqrt(data_dict["gyroJerkX"]**2 + data_dict["gyroJerkY"]**2 + data_dict["gyroJerkZ"]**2)

    # Feature extraction time!
    dict_keys = list(data_dict.keys())
    for d in dict_keys:
        # Mean
        features.append(data_dict[d].mean())
        feature_names.add(d + "_Mean")

        # Median
        features.append(np.median(data_dict[d]))
        feature_names.add(d + "_Median")

        # Min, Max
        features.append(np.max(data_dict[d]))
        feature_names.add(d + "_Min")
        features.append(np.min(data_dict[d]))
        feature_names.add(d + "_Max")

        # Standard Deviation
        features.append(np.std(data_dict[d]))
        feature_names.add(d + "_Std")

        # Median absolute deviation
        features.append(median_abs_deviation(data_dict[d]))
        feature_names.add(d + "_Mad")

        # Range
        features.append(np.max(data_dict[d]) - np.min(data_dict[d]))
        feature_names.add(d + "_Mad")

        # Interquartile range
        lq, uq = np.percentile(data_dict[d], [75, 25])
        features.append(uq - lq)
        feature_names.add(d + "_Iqr")

        # Root mean squared
        features.append( np.sqrt(np.mean(data_dict[d]**2)) )
        feature_names.add(d + "_Rms")

        # Peak count
        features.append( find_peaks(data_dict[d]) )
        feature_names.add(d + "_Peak_count")

        # Signal energy
        features.append( (np.sum(data_dict[d]) ** 2) / len(data_dict[d]))
        feature_names.add(d + "_Energy")

        # Signal entropy
        features.append( entropy(data_dict[d]) )
        feature_names.add(d + "_Entropy")

        # Skewness & Kurtosis
        features.append( skew(data_dict[d]) )
        feature_names.add(d + "_Skewness")
        features.append( kurtosis(data_dict[d]) )
        feature_names.add(d + "_Kurtosis")

    # Finally calculate signal magnitude area
    features.append( np.mean(
        np.absolute(data_dict["bodyAccX"]) +
        np.absolute(data_dict["bodyAccY"]) +
        np.absolute(data_dict["bodyAccZ"]) 
    ))
    feature_names.add("bodyAcc_Sma")
    features.append( np.mean(
        np.absolute(data_dict["gravityAccX"]) +
        np.absolute(data_dict["gravityAccY"]) +
        np.absolute(data_dict["gravityAccZ"]) 
    ))
    feature_names.add("gravityAcc_Sma")
    features.append( np.mean(
        np.absolute(data_dict["gyroX"]) +
        np.absolute(data_dict["gyroY"]) +
        np.absolute(data_dict["gyroZ"]) 
    ))
    feature_names.add("gyro_Sma")
    features.append( np.mean(
        np.absolute(data_dict["bodyAccJerkX"]) +
        np.absolute(data_dict["bodyAccJerkY"]) +
        np.absolute(data_dict["bodyAccJerkZ"]) 
    ))
    feature_names.add("bodyAccJerk_Sma")
    features.append( np.mean(
        np.absolute(data_dict["gyroJerkX"]) +
        np.absolute(data_dict["gyroJerkY"]) +
        np.absolute(data_dict["gyroJerkZ"]) 
    ))
    feature_names.add("gyroJerk_Sma")

    # Finally add label
    features.append(label)
    return features

In [20]:
# Data loading
raw_data_labels = open("labels.txt", 'r').readlines()

raw_path = None
raw_acc_data = None
raw_gyro_data = None

complete_dataset = pd.DataFrame()


for label in raw_data_labels:
    processed_label = label.strip("\n").split(" ")
    processed_label[3] = int(processed_label[3]) - 1
    processed_label[4] = int(processed_label[4])
    print("Loading Experiment ID: {} User ID: {}, Activity: {}".format(processed_label[0], processed_label[1], processed_label[2]))

    if get_raw_data_path('acc', processed_label[0], processed_label[1]) != raw_path:
        raw_path = get_raw_data_path('gyro', processed_label[0], processed_label[1])
        raw_gyro_data = open(raw_path, 'r').readlines()
        raw_path = get_raw_data_path('acc', processed_label[0], processed_label[1])
        raw_acc_data = open(raw_path, 'r').readlines()

    start = processed_label[3]
    end = processed_label[4]
    length = end-start
    i = 0
    while i + 128 < length:
        acc_data = [[float(y) for y in x.strip("\n").split(" ")] for x in raw_acc_data[start + i: start + i + 128]]
        gyro_data = [[float(y) for y in x.strip("\n").split(" ")]  for x in raw_acc_data[start + i: start + i + 128]]

        feature_column = extract_features(
            acc_data,
            gyro_data,
            processed_label[2]
        )
        i += 64

Loading Experiment ID: 1 User ID: 1, Activity: 5
265
265
265
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 7
265
Loading Experiment ID: 1 User ID: 1, Activity: 4
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 8
265
Loading Experiment ID: 1 User ID: 1, Activity: 5
265
265
265
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 11
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 6
265
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 10
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 4
265
265
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 9
265
Loading Experiment ID: 1 User ID: 1, Activity: 6
265
265
265
265
265
265
265
265
265
265
265
265
265
Loading Experiment ID: 1 User ID: 1, Activity: 12
265
Loading Experiment ID: 1 User ID: 1, Activity: 1
265
265
265
265


KeyboardInterrupt: 