In [65]:
# Imports
import time
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
from scipy.signal import butter, filtfilt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, welch
import glob

In [66]:
def plot(df):
    # Plotting each column with row number as x-axis
    df.plot()

    # Adding labels and title
    plt.xlabel('Row Number')
    plt.ylabel('Values')
    plt.title('Pandas DataFrame Plot')
    plt.show()

In [67]:
# Global variables

# 2D Array, 137 features per row
features = np.empty((0, 137))
# 1D array, 1 number per row
answers = np.array([])
# List of every file name in RawData
file_names = glob.glob("RawData" + '/*.txt')

In [68]:
def extractWindow(start: int, end: int, experimentNum, activityNum):
    pass
    # # For testing
    # print(f"{acc_file_name} with {gyro_file_name}")
    # print(f"{start} to {end}, size is {end-start}")

# # For testing
# extractWindow(250, 375, 1, 1)

In [69]:
'''These are for noise reduction'''
# Function to apply median filter
def apply_median_filter(data, window_size):
    return data.rolling(window=window_size, center=True, min_periods=1).median()

# Function to design a low-pass Butterworth filter
def butter_lowpass(cutoff, fs, order):
    nyq = 0.5 * fs  # Nyquist frequency
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

# Function to apply the Butterworth filter
def apply_low_butter(data, cutoff, fs, order):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, data, axis=0)
    return y

'''This is for getting body acc component from total acc'''
# Define a function to create a high-pass Butterworth filter
def butter_highpass(cutoff, fs, order):
    nyquist = 0.5 * fs  # Nyquist frequency is half the sampling rate
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

# Define a function to apply the high-pass filter to data
def apply_high_butter(data, cutoff, fs, order):
    b, a = butter_highpass(cutoff, fs, order)
    y = filtfilt(b, a, data, axis=0)
    return y

In [None]:
# For handling raw data
def extractAllFeatures(label: str):
    splitted = label.split()
    experimentNum, activityNum, start, end = int(splitted[0]), int(splitted[2]), int(splitted[3]), int(splitted[4])

    acc_file_name = file_names[experimentNum-1]
    gyro_file_name = file_names[experimentNum-1+61]

    # Reading acc and gyro in data for this label range
    acc_data = pd.read_csv(acc_file_name, sep=' ', header=None, skiprows=start, nrows=end-start)
    gyro_data = pd.read_csv(gyro_file_name, sep=' ', header=None, skiprows=start, nrows=end-start)

    ''' I'm not too sure how these filters work but I hope they do'''
    # Apply median filter to each axis of accelerometer and gyroscope data (for noise reduction)
    WINDOW_SIZE = 3
    acc_data_filtered = acc_data.apply(apply_median_filter, window_size=WINDOW_SIZE)
    gyro_data_filtered = gyro_data.apply(apply_median_filter, window_size=WINDOW_SIZE)
    
    # Apply low-pass Butterworth filter to each axis of accelerometer and gyroscope data (for noise reduction)
    FS = 50                     # Sampling frequency in Hz (you mentioned 50Hz)
    CUTOFF_LOW_PASS = 20        # Desired cutoff frequency of the filter, Hz
    ORDER = 3                   # Order of the Butterworth filter
    acc_data_filtered = acc_data_filtered.apply(lambda col: apply_low_butter(col, CUTOFF_LOW_PASS, FS, ORDER))
    gyro_data_filtered = gyro_data_filtered.apply(lambda col: apply_low_butter(col, CUTOFF_LOW_PASS, FS, ORDER))    

    # Apply high-pass Butterworth filter to acc to get body component
    CUTOFF_HIGH_PASS = 0.5
    acc_data_filtered = acc_data_filtered.apply(lambda col: apply_high_butter(col, CUTOFF_HIGH_PASS, FS, ORDER))
    # plot(gyro_data_filtered)

    # 2.5sec windows * 50 samples per sec = 125 samples per window
    # 50% overlap means we go up by 62 for every new window
    # end-62 is used to skip the last half window that gets added
    while start < end-62:
        extractWindow(start, min(start+125, end), experimentNum, activityNum)
        start += 62



# '''For testing'''
# extractAllFeatures('1 1 5 250 1232')
# extractAllFeatures('1 1 7 1233 1392')
# extractAllFeatures('1 1 4 1393 2194')
# extractAllFeatures('1 1 1 7496 8078')
# extractAllFeatures('1 1 2 14069 14699')
# extractAllFeatures('1 1 3 14869 15492')

In [None]:
# Main function
def main():
    # Grab the labels.txt file telling you what each data section means
    labels_file = open('labels.txt') 
    labels_list = labels_file.readlines()

    # For each section: 
        # i. Load that section's respective data
        # ii. For each window in that section:
            # a. Apply noise filters on the window
            # b. Compute features from the window
            # c. Store those feature values in an array
        # iii. Store all the features and their respective data in a file
    for label in labels_list:
        extractAllFeatures(label)

if __name__ == "__main__":
    main()