In [1]:
import os
import numpy as np
import pandas as pd

### Organization of this code

For creating the datasets to train and test on

1. [Create dataset]()
2. [Create train dataset]()
3. [Create test dataset]()
4. [Get file]()
5. [Remove Missing values]()
6. [Sliding window feature extraction]()


In [2]:
column_names = [
    "MILLISEC",
    "Accelerometer RKN^ accX",
    "Accelerometer RKN^ accY",
    "Accelerometer RKN^ accZ",
    "Accelerometer HIP accX",
    "Accelerometer HIP accY",
    "Accelerometer HIP accZ",
    "Accelerometer LUA^ accX",
    "Accelerometer LUA^ accY",
    "Accelerometer LUA^ accZ",
    "Accelerometer RUA_ accX",
    "Accelerometer RUA_ accY",
    "Accelerometer RUA_ accZ",
    "Accelerometer LH accX",
    "Accelerometer LH accY",
    "Accelerometer LH accZ",
    "Accelerometer BACK accX",
    "Accelerometer BACK accY",
    "Accelerometer BACK accZ",
    "Accelerometer RKN_ accX",
    "Accelerometer RKN_ accY",
    "Accelerometer RKN_ accZ",
    "Accelerometer RWR accX",
    "Accelerometer RWR accY",
    "Accelerometer RWR accZ",
    "Accelerometer RUA^ accX",
    "Accelerometer RUA^ accY",
    "Accelerometer RUA^ accZ",
    "Accelerometer LUA_ accX",
    "Accelerometer LUA_ accY",
    "Accelerometer LUA_ accZ",
    "Accelerometer LWR accX",
    "Accelerometer LWR accY",
    "Accelerometer LWR accZ",
    "Accelerometer RH accX",
    "Accelerometer RH accY",
    "Accelerometer RH accZ",
    "InertialMeasurementUnit BACK accX",
    "InertialMeasurementUnit BACK accY",
    "InertialMeasurementUnit BACK accZ",
    "InertialMeasurementUnit BACK gyroX",
    "InertialMeasurementUnit BACK gyroY",
    "InertialMeasurementUnit BACK gyroZ",
    "InertialMeasurementUnit BACK magneticX",
    "InertialMeasurementUnit BACK magneticY",
    "InertialMeasurementUnit BACK magneticZ",
    "InertialMeasurementUnit BACK Quaternion1",
    "InertialMeasurementUnit BACK Quaternion2",
    "InertialMeasurementUnit BACK Quaternion3",
    "InertialMeasurementUnit BACK Quaternion4",
    "InertialMeasurementUnit RUA accX",
    "InertialMeasurementUnit RUA accY",
    "InertialMeasurementUnit RUA accZ",
    "InertialMeasurementUnit RUA gyroX",
    "InertialMeasurementUnit RUA gyroY",
    "InertialMeasurementUnit RUA gyroZ",
    "InertialMeasurementUnit RUA magneticX",
    "InertialMeasurementUnit RUA magneticY",
    "InertialMeasurementUnit RUA magneticZ",
    "InertialMeasurementUnit RUA Quaternion1",
    "InertialMeasurementUnit RUA Quaternion2",
    "InertialMeasurementUnit RUA Quaternion3",
    "InertialMeasurementUnit RUA Quaternion4",
    "InertialMeasurementUnit RLA accX",
    "InertialMeasurementUnit RLA accY",
    "InertialMeasurementUnit RLA accZ",
    "InertialMeasurementUnit RLA gyroX",
    "InertialMeasurementUnit RLA gyroY",
    "InertialMeasurementUnit RLA gyroZ",
    "InertialMeasurementUnit RLA magneticX",
    "InertialMeasurementUnit RLA magneticY",
    "InertialMeasurementUnit RLA magneticZ",
    "InertialMeasurementUnit RLA Quaternion1",
    "InertialMeasurementUnit RLA Quaternion2",
    "InertialMeasurementUnit RLA Quaternion3",
    "InertialMeasurementUnit RLA Quaternion4",
    "InertialMeasurementUnit LUA accX",
    "InertialMeasurementUnit LUA accY",
    "InertialMeasurementUnit LUA accZ",
    "InertialMeasurementUnit LUA gyroX",
    "InertialMeasurementUnit LUA gyroY",
    "InertialMeasurementUnit LUA gyroZ",
    "InertialMeasurementUnit LUA magneticX",
    "InertialMeasurementUnit LUA magneticY",
    "InertialMeasurementUnit LUA magneticZ",
    "InertialMeasurementUnit LUA Quaternion1",
    "InertialMeasurementUnit LUA Quaternion2",
    "InertialMeasurementUnit LUA Quaternion3",
    "InertialMeasurementUnit LUA Quaternion4",
    "InertialMeasurementUnit LLA accX",
    "InertialMeasurementUnit LLA accY",
    "InertialMeasurementUnit LLA accZ",
    "InertialMeasurementUnit LLA gyroX",
    "InertialMeasurementUnit LLA gyroY",
    "InertialMeasurementUnit LLA gyroZ",
    "InertialMeasurementUnit LLA magneticX",
    "InertialMeasurementUnit LLA magneticY",
    "InertialMeasurementUnit LLA magneticZ",
    "InertialMeasurementUnit LLA Quaternion1",
    "InertialMeasurementUnit LLA Quaternion2",
    "InertialMeasurementUnit LLA Quaternion3",
    "InertialMeasurementUnit LLA Quaternion4",
    "InertialMeasurementUnit L-SHOE EuX",
    "InertialMeasurementUnit L-SHOE EuY",
    "InertialMeasurementUnit L-SHOE EuZ",
    "InertialMeasurementUnit L-SHOE Nav_Ax",
    "InertialMeasurementUnit L-SHOE Nav_Ay",
    "InertialMeasurementUnit L-SHOE Nav_Az",
    "InertialMeasurementUnit L-SHOE Body_Ax",
    "InertialMeasurementUnit L-SHOE Body_Ay",
    "InertialMeasurementUnit L-SHOE Body_Az",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit L-SHOE Compass",
    "InertialMeasurementUnit R-SHOE EuX",
    "InertialMeasurementUnit R-SHOE EuY",
    "InertialMeasurementUnit R-SHOE EuZ",
    "InertialMeasurementUnit R-SHOE Nav_Ax",
    "InertialMeasurementUnit R-SHOE Nav_Ay",
    "InertialMeasurementUnit R-SHOE Nav_Az",
    "InertialMeasurementUnit R-SHOE Body_Ax",
    "InertialMeasurementUnit R-SHOE Body_Ay",
    "InertialMeasurementUnit R-SHOE Body_Az",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit R-SHOE Compass",
    "Accelerometer CUP accX",
    "Accelerometer CUP accX",
    "Accelerometer CUP accX",
    "Accelerometer CUP gyroX",
    "Accelerometer CUP gyroY",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI gyroX",
    "Accelerometer SALAMI gyroY",
    "Accelerometer WATER accX",
    "Accelerometer WATER accX",
    "Accelerometer WATER accX",
    "Accelerometer WATER gyroX",
    "Accelerometer WATER gyroY",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE gyroX",
    "Accelerometer CHEESE gyroY",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD gyroX",
    "Accelerometer BREAD gyroY",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 gyroX",
    "Accelerometer KNIFE1 gyroY",
    "Accelerometer MILK accX",
    "Accelerometer MILK accX",
    "Accelerometer MILK accX",
    "Accelerometer MILK gyroX",
    "Accelerometer MILK gyroY",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON gyroX",
    "Accelerometer SPOON gyroY",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR gyroX",
    "Accelerometer SUGAR gyroY",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 gyroX",
    "Accelerometer KNIFE2 gyroY",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE gyroX",
    "Accelerometer PLATE gyroY",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS gyroX",
    "Accelerometer GLASS gyroY",
    "REED SWITCH DISHWASHER S1",
    "REED SWITCH FRIDGE S3",
    "REED SWITCH FRIDGE S2",
    "REED SWITCH FRIDGE S1",
    "REED SWITCH MIDDLEDRAWER S1",
    "REED SWITCH MIDDLEDRAWER S2",
    "REED SWITCH MIDDLEDRAWER S3",
    "REED SWITCH LOWERDRAWER S3",
    "REED SWITCH LOWERDRAWER S2",
    "REED SWITCH UPPERDRAWER",
    "REED SWITCH DISHWASHER S3",
    "REED SWITCH LOWERDRAWER S1",
    "REED SWITCH DISHWASHER S2",
    "Accelerometer DOOR1 accX",
    "Accelerometer DOOR1 accY",
    "Accelerometer DOOR1 accZ",
    "Accelerometer LAZYCHAIR accX",
    "Accelerometer LAZYCHAIR accY",
    "Accelerometer LAZYCHAIR accZ",
    "Accelerometer DOOR2 accX",
    "Accelerometer DOOR2 accY",
    "Accelerometer DOOR2 accZ",
    "Accelerometer DISHWASHER accX",
    "Accelerometer DISHWASHER accY",
    "Accelerometer DISHWASHER accZ",
    "Accelerometer UPPERDRAWER accX",
    "Accelerometer UPPERDRAWER accY",
    "Accelerometer UPPERDRAWER accZ",
    "Accelerometer LOWERDRAWER accX",
    "Accelerometer LOWERDRAWER accY",
    "Accelerometer LOWERDRAWER accZ",
    "Accelerometer MIDDLEDRAWER accX",
    "Accelerometer MIDDLEDRAWER accY",
    "Accelerometer MIDDLEDRAWER accZ",
    "Accelerometer FRIDGE accX",
    "Accelerometer FRIDGE accY",
    "Accelerometer FRIDGE accZ",
    "LOCATION TAG1 X",
    "LOCATION TAG1 Y",
    "LOCATION TAG1 Z",
    "LOCATION TAG2 X",
    "LOCATION TAG2 Y",
    "LOCATION TAG2 Z",
    "LOCATION TAG3 X",
    "LOCATION TAG3 Y",
    "LOCATION TAG3 Z",
    "LOCATION TAG4 X",
    "LOCATION TAG4 Y",
    "LOCATION TAG4 Z",
    "Locomotion",
    "HL_Activity",
    "LL_Left_Arm",
    "LL_Left_Arm_Object",
    "LL_Right_Arm",
    "LL_Right_Arm_Object",
    "ML_Both_Arms",
]

In [3]:
feature_columns = [
    "MILLISEC",
    "InertialMeasurementUnit BACK accX",
    "InertialMeasurementUnit BACK accY",
    "InertialMeasurementUnit BACK accZ",
    "InertialMeasurementUnit BACK gyroX",
    "InertialMeasurementUnit BACK gyroY",
    "InertialMeasurementUnit BACK gyroZ",
    "InertialMeasurementUnit BACK magneticX",
    "InertialMeasurementUnit BACK magneticY",
    "InertialMeasurementUnit BACK magneticZ",
    "InertialMeasurementUnit RUA accX",
    "InertialMeasurementUnit RUA accY",
    "InertialMeasurementUnit RUA accZ",
    "InertialMeasurementUnit RUA gyroX",
    "InertialMeasurementUnit RUA gyroY",
    "InertialMeasurementUnit RUA gyroZ",
    "InertialMeasurementUnit RUA magneticX",
    "InertialMeasurementUnit RUA magneticY",
    "InertialMeasurementUnit RUA magneticZ",
    "InertialMeasurementUnit RLA accX",
    "InertialMeasurementUnit RLA accY",
    "InertialMeasurementUnit RLA accZ",
    "InertialMeasurementUnit RLA gyroX",
    "InertialMeasurementUnit RLA gyroY",
    "InertialMeasurementUnit RLA gyroZ",
    "InertialMeasurementUnit RLA magneticX",
    "InertialMeasurementUnit RLA magneticY",
    "InertialMeasurementUnit RLA magneticZ",
    "InertialMeasurementUnit LUA accX",
    "InertialMeasurementUnit LUA accY",
    "InertialMeasurementUnit LUA accZ",
    "InertialMeasurementUnit LUA gyroX",
    "InertialMeasurementUnit LUA gyroY",
    "InertialMeasurementUnit LUA gyroZ",
    "InertialMeasurementUnit LUA magneticX",
    "InertialMeasurementUnit LUA magneticY",
    "InertialMeasurementUnit LUA magneticZ",
    "InertialMeasurementUnit LLA accX",
    "InertialMeasurementUnit LLA accY",
    "InertialMeasurementUnit LLA accZ",
    "InertialMeasurementUnit LLA gyroX",
    "InertialMeasurementUnit LLA gyroY",
    "InertialMeasurementUnit LLA gyroZ",
    "InertialMeasurementUnit LLA magneticX",
    "InertialMeasurementUnit LLA magneticY",
    "InertialMeasurementUnit LLA magneticZ",
    "InertialMeasurementUnit L-SHOE EuX",
    "InertialMeasurementUnit L-SHOE EuY",
    "InertialMeasurementUnit L-SHOE EuZ",
    "InertialMeasurementUnit L-SHOE Nav_Ax",
    "InertialMeasurementUnit L-SHOE Nav_Ay",
    "InertialMeasurementUnit L-SHOE Nav_Az",
    "InertialMeasurementUnit L-SHOE Body_Ax",
    "InertialMeasurementUnit L-SHOE Body_Ay",
    "InertialMeasurementUnit L-SHOE Body_Az",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit L-SHOE Compass",
    "InertialMeasurementUnit R-SHOE EuX",
    "InertialMeasurementUnit R-SHOE EuY",
    "InertialMeasurementUnit R-SHOE EuZ",
    "InertialMeasurementUnit R-SHOE Nav_Ax",
    "InertialMeasurementUnit R-SHOE Nav_Ay",
    "InertialMeasurementUnit R-SHOE Nav_Az",
    "InertialMeasurementUnit R-SHOE Body_Ax",
    "InertialMeasurementUnit R-SHOE Body_Ay",
    "InertialMeasurementUnit R-SHOE Body_Az",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit R-SHOE Compass",
    "Locomotion",
]

In [4]:
def find_duplicates(input_list):
    seen = set()
    duplicates = set()
    for item in input_list:
        if item in seen:
            duplicates.add(item)
        else:
            seen.add(item)
    return list(duplicates)

In [5]:
def replace_duplicates(input_list):
    counts = {}
    new_list = []
    for item in input_list:
        if item in counts:
            counts[item] += 1
            if counts[item] == 2:
                new_list.append(item[:-1] + "Y")
            elif counts[item] == 3:
                new_list.append(item[:-1] + "Z")
            else:
                new_list.append(item)
        else:
            counts[item] = 1
            new_list.append(item)
    return new_list

In [6]:
unique_column_names = replace_duplicates(column_names)
print(unique_column_names)

['MILLISEC', 'Accelerometer RKN^ accX', 'Accelerometer RKN^ accY', 'Accelerometer RKN^ accZ', 'Accelerometer HIP accX', 'Accelerometer HIP accY', 'Accelerometer HIP accZ', 'Accelerometer LUA^ accX', 'Accelerometer LUA^ accY', 'Accelerometer LUA^ accZ', 'Accelerometer RUA_ accX', 'Accelerometer RUA_ accY', 'Accelerometer RUA_ accZ', 'Accelerometer LH accX', 'Accelerometer LH accY', 'Accelerometer LH accZ', 'Accelerometer BACK accX', 'Accelerometer BACK accY', 'Accelerometer BACK accZ', 'Accelerometer RKN_ accX', 'Accelerometer RKN_ accY', 'Accelerometer RKN_ accZ', 'Accelerometer RWR accX', 'Accelerometer RWR accY', 'Accelerometer RWR accZ', 'Accelerometer RUA^ accX', 'Accelerometer RUA^ accY', 'Accelerometer RUA^ accZ', 'Accelerometer LUA_ accX', 'Accelerometer LUA_ accY', 'Accelerometer LUA_ accZ', 'Accelerometer LWR accX', 'Accelerometer LWR accY', 'Accelerometer LWR accZ', 'Accelerometer RH accX', 'Accelerometer RH accY', 'Accelerometer RH accZ', 'InertialMeasurementUnit BACK accX',

In [7]:
bt_included_cols = []


def get_file(filepath):
    df = pd.read_csv(filepath, header=None, names=unique_column_names, delimiter=" ")
    return df[[feature_columns]]

In [8]:
def remove_missing_values(df, method="linear", order=None):
    """
    Interpolation for missing values
    """
    # todo: add method for removing missing rows entirely
    # todo allow interpolation only if certain number of columns have missing values

    if not df.empty:
        if method == "spline":

            return df.interpolate(method=method, order=order)

        return df.interpolate(method=method)
    return df

#### What to do in sliding window function

Create an index value to determine order later (first window gets a 1, so on)

For each feature to be computed on, calculate the stat and add it to a temp storage (dict). Keep result for each window in a list (list of dicts) then convert it back into a dataframe at the end of computation.

Labels - Currently no idea, but definitely not most frequent, as null class is way more frequent and will wipe out observations.
I could try other methods but need some more exploratory analysis for that.


In [None]:
def sliding_window_feature_extraction(df, columns, window_size, overlap, stats):
    """
    Applies a sliding window on a DataFrame column and computes specified statistics for each window,
    without padding, effectively transforming the data into a feature matrix.

    Parameters:
    - df: pandas DataFrame.
    - columns: The column names as a list of strings for which statistics will be computed.
    - window_size: The size of the window.
    - overlap: The overlap between windows.
    - stats: List of statistical functions to apply. Each function should accept a pd.Series and return a scalar.

    Returns:
    - DataFrame where each row corresponds to a window and each column to a computed statistical value.
    """
    step_size = window_size - overlap
    start_points = range(0, len(df) - window_size + 1, step_size)

    results = []
    for start in start_points:
        window_stats = {"timestamp": start}
        for column in columns:
            window = df[column].iloc[start : start + window_size]
            for f in stats:
                key_name = f"{column}_{f.__name__}"
                window_stats[key_name] = f(window)
                results.append(window_stats)

    results_df = pd.DataFrame(results)

    return results_df

In [None]:
def create_training_dataset(filenames):
    for filename in filenames:
        df = get_file(filename)
        df = remove_missing_values(df)

Data dir is like

Opportunity++
\data_dir
\S1-ADL1
S1-ADL1_sensors_data.csv


In [None]:
def get_filenames(data_dir, test_files):
    os.chdir(data_dir)
    train_filenames = []
    test_filenames = []

    for folder in os.listdir():
        if folder not in test_files:
            os.chdir("folder")
            sensor_path = os.path.join(os.getcwd(), f"{folder}_sensors_data.csv")
            train_filenames.append(sensor_path)
            os.chdir("..")
        else:
            os.chdir("folder")
            sensor_path = os.path.join(os.getcwd(), f"{folder}_sensors_data.csv")
            test_filenames.append(sensor_path)
            os.chdir("..")
    return test_filenames, train_filenames

In [None]:
dir = r"C:\Users\Pranshu\Downloads\Opportunity++\data"
# replace with your own path