In [56]:
# Imports 
import random
from scipy.signal import medfilt, butter, filtfilt
import pandas as pd
import numpy as np

In [57]:
# Constants
DATA_PATH = './data/'
DATASET_PATH = DATA_PATH + 'smartphone+based+recognition+of+human+activities+and+postural+transitions/'
MODELS_PATH = DATA_PATH + 'models/raw-models/'

In [58]:
# Functions
def get_raw_data_path(type, exp_id, user_id):
    exp = exp_id if len(exp_id) == 2 else "0" + exp_id
    user = user_id if len(user_id) == 2 else "0" + user_id
    return DATASET_PATH + "RawData/{}_exp{}_user{}.txt".format(type, exp, user)

def apply_med_filter(column):
    return list( medfilt(np.array(column), kernel_size=5) )

# Function to design a low-pass Butterworth filter
def butter_lowpass(cutoff, fs, order):
    nyq = 0.5 * fs  # Nyquist frequency
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

# Function to apply the Butterworth filter
def apply_low_butter(data, cutoff, fs, order):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, np.array(data), axis=0)
    return y

'''This is for getting body acc component from total acc'''
# Define a function to create a high-pass Butterworth filter
def butter_highpass(cutoff, fs, order):
    nyquist = 0.5 * fs  # Nyquist frequency is half the sampling rate
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

# Define a function to apply the high-pass filter to data
def apply_high_butter(data, cutoff, fs, order):
    b, a = butter_highpass(cutoff, fs, order)
    y = filtfilt(b, a, np.array(data), axis=0)
    return y

In [59]:
# Data loading
raw_data_labels = open(DATASET_PATH + "RawData/labels.txt", 'r').readlines()

raw_path = None
raw_acc_file = None
raw_gyro_file = None

raw_compiled_data = []
raw_data_file = open(DATA_PATH + "self-calculated/raw-data/raw-data.txt", "w")
raw_target_train_file = open(DATA_PATH + "self-calculated/raw-data/train-data.txt", "w")
raw_target_test_file = open(DATA_PATH + "self-calculated/raw-data/test-data.txt", "w")
PARTITION = 0.7

total_size = 0

for label in raw_data_labels:
    processed_label = label.strip("\n").split(" ")
    processed_label[3] = int(processed_label[3]) - 1
    processed_label[4] = int(processed_label[4])
    print("Loading Experiment ID: {} User ID: {}, Activity: {}".format(processed_label[0], processed_label[1], processed_label[2]))

    if get_raw_data_path('acc', processed_label[0], processed_label[1]) != raw_path:
        raw_path = get_raw_data_path('gyro', processed_label[0], processed_label[1])
        raw_gyro_file = open(raw_path, 'r').readlines()
        raw_path = get_raw_data_path('acc', processed_label[0], processed_label[1])
        raw_acc_file = open(raw_path, 'r').readlines()

    for x in range(processed_label[3], processed_label[4]):
        acc_data = raw_acc_file[x].strip("\n").split(" ")
        gyro_data = raw_gyro_file[x].strip("\n").split(" ")

        raw_compiled_data.append("{} {} {} {} {} {} {} {}\n".format(
            acc_data[0],
            acc_data[1],
            acc_data[2],
            gyro_data[0],
            gyro_data[1],
            gyro_data[2],
            processed_label[1],
            processed_label[2]
        ))

Loading Experiment ID: 1 User ID: 1, Activity: 5
Loading Experiment ID: 1 User ID: 1, Activity: 7
Loading Experiment ID: 1 User ID: 1, Activity: 4
Loading Experiment ID: 1 User ID: 1, Activity: 8
Loading Experiment ID: 1 User ID: 1, Activity: 5
Loading Experiment ID: 1 User ID: 1, Activity: 11
Loading Experiment ID: 1 User ID: 1, Activity: 6
Loading Experiment ID: 1 User ID: 1, Activity: 10
Loading Experiment ID: 1 User ID: 1, Activity: 4
Loading Experiment ID: 1 User ID: 1, Activity: 9
Loading Experiment ID: 1 User ID: 1, Activity: 6
Loading Experiment ID: 1 User ID: 1, Activity: 12
Loading Experiment ID: 1 User ID: 1, Activity: 1
Loading Experiment ID: 1 User ID: 1, Activity: 1
Loading Experiment ID: 1 User ID: 1, Activity: 1
Loading Experiment ID: 1 User ID: 1, Activity: 1
Loading Experiment ID: 1 User ID: 1, Activity: 3
Loading Experiment ID: 1 User ID: 1, Activity: 2
Loading Experiment ID: 1 User ID: 1, Activity: 3
Loading Experiment ID: 1 User ID: 1, Activity: 2
Loading Experimen

In [60]:
'''
random.shuffle(raw_compiled_data)
partition_idx = int(PARTITION * len(raw_compiled_data))
train_data = raw_compiled_data[:partition_idx]
test_data = raw_compiled_data[partition_idx:]

for item in train_data:
    raw_target_train_file.write(item)

for item in test_data:
    raw_target_test_file.write(item)
'''

for item in raw_compiled_data:
    raw_data_file.write(item)

In [61]:
raw_data_file.close()
raw_target_train_file.close()
raw_target_test_file.close()

In [62]:
raw_data_set = pd.read_csv(DATA_PATH + "self-calculated/raw-data/raw-data.txt", sep='\s+', header=None)
raw_data_set.columns = ['accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ', "subject", "activity"]

for column in raw_data_set.columns[:-2]:
    raw_data_set[column] = apply_med_filter(raw_data_set[column].tolist())

raw_data_set.to_csv(DATA_PATH + "self-calculated/raw-data/median-filtered.txt", sep=' ', header=None, index=False)

  raw_data_set = pd.read_csv(DATA_PATH + "self-calculated/raw-data/raw-data.txt", sep='\s+', header=None)


In [63]:
for column in raw_data_set.columns[:-2]:
    raw_data_set[column] = apply_low_butter(raw_data_set[column].tolist(), 20, 50, 3)

raw_data_set.to_csv(DATA_PATH + "self-calculated/raw-data/butter-filtered.txt", sep=' ', header=None, index=False)

In [64]:
filtered_data_set = pd.DataFrame()

for column in raw_data_set.columns[:3]:
    filtered_data_set['bodyAcc' + column[-1]] = apply_high_butter(raw_data_set[column].tolist(), 0.3, 50, 3)
    filtered_data_set['gravityAcc' + column[-1]] = apply_low_butter(raw_data_set[column].tolist(), 0.3, 50, 3)

for column in raw_data_set.columns[3:]:
    filtered_data_set[column] = raw_data_set[column]

filtered_data_set.to_csv(DATA_PATH + "self-calculated/raw-data/complete-filtered.txt", sep=' ', header=None, index=False)