In [None]:
import os
from google.colab import drive
import numpy as np
import pandas as pd
import keras
from keras import Sequential
from keras.layers import (
    Conv1D,
    MaxPooling1D,
    LSTM,
    Dense,
    Dropout,
    GlobalAveragePooling1D,
    BatchNormalization,
)
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from keras.utils import to_categorical
import matplotlib.pyplot as plt

In [None]:
drive.mount("/content/drive")
path = "/content/drive/My Drive/opportunity_raw/opp_temp"
os.listdir(path)

In [None]:
os.listdir(os.path.join(path, "train"))

In [None]:
column_names = [
    "MILLISEC",
    "Accelerometer RKN^ accX",
    "Accelerometer RKN^ accY",
    "Accelerometer RKN^ accZ",
    "Accelerometer HIP accX",
    "Accelerometer HIP accY",
    "Accelerometer HIP accZ",
    "Accelerometer LUA^ accX",
    "Accelerometer LUA^ accY",
    "Accelerometer LUA^ accZ",
    "Accelerometer RUA_ accX",
    "Accelerometer RUA_ accY",
    "Accelerometer RUA_ accZ",
    "Accelerometer LH accX",
    "Accelerometer LH accY",
    "Accelerometer LH accZ",
    "Accelerometer BACK accX",
    "Accelerometer BACK accY",
    "Accelerometer BACK accZ",
    "Accelerometer RKN_ accX",
    "Accelerometer RKN_ accY",
    "Accelerometer RKN_ accZ",
    "Accelerometer RWR accX",
    "Accelerometer RWR accY",
    "Accelerometer RWR accZ",
    "Accelerometer RUA^ accX",
    "Accelerometer RUA^ accY",
    "Accelerometer RUA^ accZ",
    "Accelerometer LUA_ accX",
    "Accelerometer LUA_ accY",
    "Accelerometer LUA_ accZ",
    "Accelerometer LWR accX",
    "Accelerometer LWR accY",
    "Accelerometer LWR accZ",
    "Accelerometer RH accX",
    "Accelerometer RH accY",
    "Accelerometer RH accZ",
    "InertialMeasurementUnit BACK accX",
    "InertialMeasurementUnit BACK accY",
    "InertialMeasurementUnit BACK accZ",
    "InertialMeasurementUnit BACK gyroX",
    "InertialMeasurementUnit BACK gyroY",
    "InertialMeasurementUnit BACK gyroZ",
    "InertialMeasurementUnit BACK magneticX",
    "InertialMeasurementUnit BACK magneticY",
    "InertialMeasurementUnit BACK magneticZ",
    "InertialMeasurementUnit BACK Quaternion1",
    "InertialMeasurementUnit BACK Quaternion2",
    "InertialMeasurementUnit BACK Quaternion3",
    "InertialMeasurementUnit BACK Quaternion4",
    "InertialMeasurementUnit RUA accX",
    "InertialMeasurementUnit RUA accY",
    "InertialMeasurementUnit RUA accZ",
    "InertialMeasurementUnit RUA gyroX",
    "InertialMeasurementUnit RUA gyroY",
    "InertialMeasurementUnit RUA gyroZ",
    "InertialMeasurementUnit RUA magneticX",
    "InertialMeasurementUnit RUA magneticY",
    "InertialMeasurementUnit RUA magneticZ",
    "InertialMeasurementUnit RUA Quaternion1",
    "InertialMeasurementUnit RUA Quaternion2",
    "InertialMeasurementUnit RUA Quaternion3",
    "InertialMeasurementUnit RUA Quaternion4",
    "InertialMeasurementUnit RLA accX",
    "InertialMeasurementUnit RLA accY",
    "InertialMeasurementUnit RLA accZ",
    "InertialMeasurementUnit RLA gyroX",
    "InertialMeasurementUnit RLA gyroY",
    "InertialMeasurementUnit RLA gyroZ",
    "InertialMeasurementUnit RLA magneticX",
    "InertialMeasurementUnit RLA magneticY",
    "InertialMeasurementUnit RLA magneticZ",
    "InertialMeasurementUnit RLA Quaternion1",
    "InertialMeasurementUnit RLA Quaternion2",
    "InertialMeasurementUnit RLA Quaternion3",
    "InertialMeasurementUnit RLA Quaternion4",
    "InertialMeasurementUnit LUA accX",
    "InertialMeasurementUnit LUA accY",
    "InertialMeasurementUnit LUA accZ",
    "InertialMeasurementUnit LUA gyroX",
    "InertialMeasurementUnit LUA gyroY",
    "InertialMeasurementUnit LUA gyroZ",
    "InertialMeasurementUnit LUA magneticX",
    "InertialMeasurementUnit LUA magneticY",
    "InertialMeasurementUnit LUA magneticZ",
    "InertialMeasurementUnit LUA Quaternion1",
    "InertialMeasurementUnit LUA Quaternion2",
    "InertialMeasurementUnit LUA Quaternion3",
    "InertialMeasurementUnit LUA Quaternion4",
    "InertialMeasurementUnit LLA accX",
    "InertialMeasurementUnit LLA accY",
    "InertialMeasurementUnit LLA accZ",
    "InertialMeasurementUnit LLA gyroX",
    "InertialMeasurementUnit LLA gyroY",
    "InertialMeasurementUnit LLA gyroZ",
    "InertialMeasurementUnit LLA magneticX",
    "InertialMeasurementUnit LLA magneticY",
    "InertialMeasurementUnit LLA magneticZ",
    "InertialMeasurementUnit LLA Quaternion1",
    "InertialMeasurementUnit LLA Quaternion2",
    "InertialMeasurementUnit LLA Quaternion3",
    "InertialMeasurementUnit LLA Quaternion4",
    "InertialMeasurementUnit L-SHOE EuX",
    "InertialMeasurementUnit L-SHOE EuY",
    "InertialMeasurementUnit L-SHOE EuZ",
    "InertialMeasurementUnit L-SHOE Nav_Ax",
    "InertialMeasurementUnit L-SHOE Nav_Ay",
    "InertialMeasurementUnit L-SHOE Nav_Az",
    "InertialMeasurementUnit L-SHOE Body_Ax",
    "InertialMeasurementUnit L-SHOE Body_Ay",
    "InertialMeasurementUnit L-SHOE Body_Az",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit L-SHOE Compass",
    "InertialMeasurementUnit R-SHOE EuX",
    "InertialMeasurementUnit R-SHOE EuY",
    "InertialMeasurementUnit R-SHOE EuZ",
    "InertialMeasurementUnit R-SHOE Nav_Ax",
    "InertialMeasurementUnit R-SHOE Nav_Ay",
    "InertialMeasurementUnit R-SHOE Nav_Az",
    "InertialMeasurementUnit R-SHOE Body_Ax",
    "InertialMeasurementUnit R-SHOE Body_Ay",
    "InertialMeasurementUnit R-SHOE Body_Az",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit R-SHOE Compass",
    "Accelerometer CUP accX",
    "Accelerometer CUP accX",
    "Accelerometer CUP accX",
    "Accelerometer CUP gyroX",
    "Accelerometer CUP gyroY",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI accX",
    "Accelerometer SALAMI gyroX",
    "Accelerometer SALAMI gyroY",
    "Accelerometer WATER accX",
    "Accelerometer WATER accX",
    "Accelerometer WATER accX",
    "Accelerometer WATER gyroX",
    "Accelerometer WATER gyroY",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE accX",
    "Accelerometer CHEESE gyroX",
    "Accelerometer CHEESE gyroY",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD accX",
    "Accelerometer BREAD gyroX",
    "Accelerometer BREAD gyroY",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 accX",
    "Accelerometer KNIFE1 gyroX",
    "Accelerometer KNIFE1 gyroY",
    "Accelerometer MILK accX",
    "Accelerometer MILK accX",
    "Accelerometer MILK accX",
    "Accelerometer MILK gyroX",
    "Accelerometer MILK gyroY",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON accX",
    "Accelerometer SPOON gyroX",
    "Accelerometer SPOON gyroY",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR accX",
    "Accelerometer SUGAR gyroX",
    "Accelerometer SUGAR gyroY",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 accX",
    "Accelerometer KNIFE2 gyroX",
    "Accelerometer KNIFE2 gyroY",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE accX",
    "Accelerometer PLATE gyroX",
    "Accelerometer PLATE gyroY",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS accX",
    "Accelerometer GLASS gyroX",
    "Accelerometer GLASS gyroY",
    "REED SWITCH DISHWASHER S1",
    "REED SWITCH FRIDGE S3",
    "REED SWITCH FRIDGE S2",
    "REED SWITCH FRIDGE S1",
    "REED SWITCH MIDDLEDRAWER S1",
    "REED SWITCH MIDDLEDRAWER S2",
    "REED SWITCH MIDDLEDRAWER S3",
    "REED SWITCH LOWERDRAWER S3",
    "REED SWITCH LOWERDRAWER S2",
    "REED SWITCH UPPERDRAWER",
    "REED SWITCH DISHWASHER S3",
    "REED SWITCH LOWERDRAWER S1",
    "REED SWITCH DISHWASHER S2",
    "Accelerometer DOOR1 accX",
    "Accelerometer DOOR1 accY",
    "Accelerometer DOOR1 accZ",
    "Accelerometer LAZYCHAIR accX",
    "Accelerometer LAZYCHAIR accY",
    "Accelerometer LAZYCHAIR accZ",
    "Accelerometer DOOR2 accX",
    "Accelerometer DOOR2 accY",
    "Accelerometer DOOR2 accZ",
    "Accelerometer DISHWASHER accX",
    "Accelerometer DISHWASHER accY",
    "Accelerometer DISHWASHER accZ",
    "Accelerometer UPPERDRAWER accX",
    "Accelerometer UPPERDRAWER accY",
    "Accelerometer UPPERDRAWER accZ",
    "Accelerometer LOWERDRAWER accX",
    "Accelerometer LOWERDRAWER accY",
    "Accelerometer LOWERDRAWER accZ",
    "Accelerometer MIDDLEDRAWER accX",
    "Accelerometer MIDDLEDRAWER accY",
    "Accelerometer MIDDLEDRAWER accZ",
    "Accelerometer FRIDGE accX",
    "Accelerometer FRIDGE accY",
    "Accelerometer FRIDGE accZ",
    "LOCATION TAG1 X",
    "LOCATION TAG1 Y",
    "LOCATION TAG1 Z",
    "LOCATION TAG2 X",
    "LOCATION TAG2 Y",
    "LOCATION TAG2 Z",
    "LOCATION TAG3 X",
    "LOCATION TAG3 Y",
    "LOCATION TAG3 Z",
    "LOCATION TAG4 X",
    "LOCATION TAG4 Y",
    "LOCATION TAG4 Z",
    "Locomotion",
    "HL_Activity",
    "LL_Left_Arm",
    "LL_Left_Arm_Object",
    "LL_Right_Arm",
    "LL_Right_Arm_Object",
    "ML_Both_Arms",
]

In [None]:
# misnomer really, contains the locomotion target variable as well
feature_columns = [
    "InertialMeasurementUnit BACK accX",
    "InertialMeasurementUnit BACK accY",
    "InertialMeasurementUnit BACK accZ",
    "InertialMeasurementUnit BACK gyroX",
    "InertialMeasurementUnit BACK gyroY",
    "InertialMeasurementUnit BACK gyroZ",
    "InertialMeasurementUnit BACK magneticX",
    "InertialMeasurementUnit BACK magneticY",
    "InertialMeasurementUnit BACK magneticZ",
    "InertialMeasurementUnit RUA accX",
    "InertialMeasurementUnit RUA accY",
    "InertialMeasurementUnit RUA accZ",
    "InertialMeasurementUnit RUA gyroX",
    "InertialMeasurementUnit RUA gyroY",
    "InertialMeasurementUnit RUA gyroZ",
    "InertialMeasurementUnit RUA magneticX",
    "InertialMeasurementUnit RUA magneticY",
    "InertialMeasurementUnit RUA magneticZ",
    "InertialMeasurementUnit RLA accX",
    "InertialMeasurementUnit RLA accY",
    "InertialMeasurementUnit RLA accZ",
    "InertialMeasurementUnit RLA gyroX",
    "InertialMeasurementUnit RLA gyroY",
    "InertialMeasurementUnit RLA gyroZ",
    "InertialMeasurementUnit RLA magneticX",
    "InertialMeasurementUnit RLA magneticY",
    "InertialMeasurementUnit RLA magneticZ",
    "InertialMeasurementUnit LUA accX",
    "InertialMeasurementUnit LUA accY",
    "InertialMeasurementUnit LUA accZ",
    "InertialMeasurementUnit LUA gyroX",
    "InertialMeasurementUnit LUA gyroY",
    "InertialMeasurementUnit LUA gyroZ",
    "InertialMeasurementUnit LUA magneticX",
    "InertialMeasurementUnit LUA magneticY",
    "InertialMeasurementUnit LUA magneticZ",
    "InertialMeasurementUnit LLA accX",
    "InertialMeasurementUnit LLA accY",
    "InertialMeasurementUnit LLA accZ",
    "InertialMeasurementUnit LLA gyroX",
    "InertialMeasurementUnit LLA gyroY",
    "InertialMeasurementUnit LLA gyroZ",
    "InertialMeasurementUnit LLA magneticX",
    "InertialMeasurementUnit LLA magneticY",
    "InertialMeasurementUnit LLA magneticZ",
    "InertialMeasurementUnit L-SHOE EuX",
    "InertialMeasurementUnit L-SHOE EuY",
    "InertialMeasurementUnit L-SHOE EuZ",
    "InertialMeasurementUnit L-SHOE Nav_Ax",
    "InertialMeasurementUnit L-SHOE Nav_Ay",
    "InertialMeasurementUnit L-SHOE Nav_Az",
    "InertialMeasurementUnit L-SHOE Body_Ax",
    "InertialMeasurementUnit L-SHOE Body_Ay",
    "InertialMeasurementUnit L-SHOE Body_Az",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit L-SHOE Compass",
    "InertialMeasurementUnit R-SHOE EuX",
    "InertialMeasurementUnit R-SHOE EuY",
    "InertialMeasurementUnit R-SHOE EuZ",
    "InertialMeasurementUnit R-SHOE Nav_Ax",
    "InertialMeasurementUnit R-SHOE Nav_Ay",
    "InertialMeasurementUnit R-SHOE Nav_Az",
    "InertialMeasurementUnit R-SHOE Body_Ax",
    "InertialMeasurementUnit R-SHOE Body_Ay",
    "InertialMeasurementUnit R-SHOE Body_Az",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit R-SHOE Compass",
    "Locomotion",
]

In [None]:
# this one is comprised entirely of features
body_features_raw = [
    "InertialMeasurementUnit BACK accX",
    "InertialMeasurementUnit BACK accY",
    "InertialMeasurementUnit BACK accZ",
    "InertialMeasurementUnit BACK gyroX",
    "InertialMeasurementUnit BACK gyroY",
    "InertialMeasurementUnit BACK gyroZ",
    "InertialMeasurementUnit BACK magneticX",
    "InertialMeasurementUnit BACK magneticY",
    "InertialMeasurementUnit BACK magneticZ",
    "InertialMeasurementUnit RUA accX",
    "InertialMeasurementUnit RUA accY",
    "InertialMeasurementUnit RUA accZ",
    "InertialMeasurementUnit RUA gyroX",
    "InertialMeasurementUnit RUA gyroY",
    "InertialMeasurementUnit RUA gyroZ",
    "InertialMeasurementUnit RUA magneticX",
    "InertialMeasurementUnit RUA magneticY",
    "InertialMeasurementUnit RUA magneticZ",
    "InertialMeasurementUnit RLA accX",
    "InertialMeasurementUnit RLA accY",
    "InertialMeasurementUnit RLA accZ",
    "InertialMeasurementUnit RLA gyroX",
    "InertialMeasurementUnit RLA gyroY",
    "InertialMeasurementUnit RLA gyroZ",
    "InertialMeasurementUnit RLA magneticX",
    "InertialMeasurementUnit RLA magneticY",
    "InertialMeasurementUnit RLA magneticZ",
    "InertialMeasurementUnit LUA accX",
    "InertialMeasurementUnit LUA accY",
    "InertialMeasurementUnit LUA accZ",
    "InertialMeasurementUnit LUA gyroX",
    "InertialMeasurementUnit LUA gyroY",
    "InertialMeasurementUnit LUA gyroZ",
    "InertialMeasurementUnit LUA magneticX",
    "InertialMeasurementUnit LUA magneticY",
    "InertialMeasurementUnit LUA magneticZ",
    "InertialMeasurementUnit LLA accX",
    "InertialMeasurementUnit LLA accY",
    "InertialMeasurementUnit LLA accZ",
    "InertialMeasurementUnit LLA gyroX",
    "InertialMeasurementUnit LLA gyroY",
    "InertialMeasurementUnit LLA gyroZ",
    "InertialMeasurementUnit LLA magneticX",
    "InertialMeasurementUnit LLA magneticY",
    "InertialMeasurementUnit LLA magneticZ",
    "InertialMeasurementUnit L-SHOE EuX",
    "InertialMeasurementUnit L-SHOE EuY",
    "InertialMeasurementUnit L-SHOE EuZ",
    "InertialMeasurementUnit L-SHOE Nav_Ax",
    "InertialMeasurementUnit L-SHOE Nav_Ay",
    "InertialMeasurementUnit L-SHOE Nav_Az",
    "InertialMeasurementUnit L-SHOE Body_Ax",
    "InertialMeasurementUnit L-SHOE Body_Ay",
    "InertialMeasurementUnit L-SHOE Body_Az",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit L-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit L-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit L-SHOE Compass",
    "InertialMeasurementUnit R-SHOE EuX",
    "InertialMeasurementUnit R-SHOE EuY",
    "InertialMeasurementUnit R-SHOE EuZ",
    "InertialMeasurementUnit R-SHOE Nav_Ax",
    "InertialMeasurementUnit R-SHOE Nav_Ay",
    "InertialMeasurementUnit R-SHOE Nav_Az",
    "InertialMeasurementUnit R-SHOE Body_Ax",
    "InertialMeasurementUnit R-SHOE Body_Ay",
    "InertialMeasurementUnit R-SHOE Body_Az",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameX",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameY",
    "InertialMeasurementUnit R-SHOE AngVelBodyFrameZ",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameX",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameY",
    "InertialMeasurementUnit R-SHOE AngVelNavFrameZ",
    "InertialMeasurementUnit R-SHOE Compass",
]

In [None]:
def replace_duplicates(input_list):
    """
    Replaces duplicate columns in the column list based on preset rules.
    The duplicates have an X at the end, so it is assumed that needs to be changed to Y
    for second occurence and X for the third.

    Returns the corrected list of column names."""
    counts = {}
    new_list = []
    for item in input_list:
        if item in counts:
            counts[item] += 1
            if counts[item] == 2:
                new_list.append(item[:-1] + "Y")
            elif counts[item] == 3:
                new_list.append(item[:-1] + "Z")
            else:
                new_list.append(item)
        else:
            counts[item] = 1
            new_list.append(item)
    return new_list

In [None]:
unique_column_names = replace_duplicates(column_names)

In [None]:
def get_file(filepath: str):
    """
    Gets the data file with required subset columns."""
    df = pd.read_csv(filepath, header=None, names=unique_column_names, delimiter=" ")
    return df[feature_columns]

In [None]:
def remove_missing_values(df: pd.DataFrame, method="linear", order=None):
    """
    Interpolation for missing values
    """
    # todo: add method for removing missing rows entirely
    # todo allow interpolation only if certain number of columns have missing values
    if not df.empty:
        if method == "spline":
            return df.interpolate(method=method, order=order)
        return df.interpolate(method=method, limit_direction="both")
    return df

In [None]:
def get_filenames(data_dir, test_files):
    os.chdir(data_dir)
    train_filenames = []
    test_filenames = []

    for folder in os.listdir():
        folder_path = os.path.join(data_dir, folder)
        sensor_path = os.path.join(folder_path, f"{folder}_sensors_data.txt")
        if folder not in test_files:
            train_filenames.append(rf"{sensor_path}")
        else:
            test_filenames.append(rf"{sensor_path}")
    return test_filenames, train_filenames

In [None]:
pd.set_option("display.max_rows", None)

In [None]:
def apply_sliding_window_combine(
    filepath: str, target_var: str, window_size=16, overlap=0.5
):
    """
    Apply sliding window transforms to features and target of the given file.

    Params:
    - filepath (str): Path of the csv file that data is contained in.
    - target_var (str): Target variable chosen for the given csv file.
    - window_size (int): Size of one sliding window for the transform.
    - overlap (float): Percentage overlap between two consecutive sliding windows.
    """
    # todo: error handling for no or multiple target variables.
    # todo: allow alternative saving as a pandas dataframe.
    shift_by = int(window_size * (1 - overlap))
    df = get_file(filepath)
    df = remove_missing_values(df)
    print(df.isnull().any())

    def sliding_window_processing(data, window_size, shift_by):
        """
        Perform sliding window operations on a single column of a df, return as np array.
        """
        start, end = 0, window_size
        windows = []
        while end <= len(data):
            windows.append(data[start:end])
            start += shift_by
            end += shift_by
        return np.array(windows)

    sliding_windows = {
        feature: sliding_window_processing(df[feature].values, window_size, shift_by)
        for feature in body_features_raw
    }
    target = sliding_window_processing(df[target_var].values, window_size, shift_by)
    combined_data = np.stack(
        [sliding_windows[feature] for feature in body_features_raw], axis=-1
    )

    return combined_data, target

In [None]:
train_filenames = os.listdir(os.path.join(path, "train"))
test_filenames = os.listdir(os.path.join(path, "test"))

train_filenames

In [None]:
test_filenames

In [None]:
# testing for missing values
df = get_file(f"{path}test/S4-Drill_sensors_data.txt")
df = remove_missing_values(df)

assert df.isnull().sum().sum() == 0, "Missing value removal failed"

In [None]:
all_combined = []
all_targets = []
for filepath in test_filenames:
    combined_data, target = apply_sliding_window_combine(
        filepath=os.path.join(path, f"test/{filepath}"), target_var="Locomotion"
    )
    all_combined.append(combined_data)
    all_targets.append(target)

final_combined_test = np.concatenate(all_combined)
final_targets_test = np.concatenate(all_targets)

In [None]:
all_combined = []
all_targets = []
for filepath in train_filenames:
    combined_data, target = apply_sliding_window_combine(
        filepath=os.path.join(path, f"train/{filepath}"), target_var="Locomotion"
    )
    all_combined.append(combined_data)
    all_targets.append(target)

final_combined_train = np.concatenate(all_combined)
final_targets_train = np.concatenate(all_targets)

In [None]:
test_data = np.array(final_combined_test)
test_targets = np.array(final_targets_test)
print(test_data.shape)

In [None]:
train_data = np.array(final_combined_train)
train_targets = np.array(final_targets_train)
print(train_data.shape)

In [None]:
X_train = train_data
y_train = train_targets

X_test = test_data
y_test = test_targets

assert X_train.shape[1] == 16, "Number of timesteps is wrong"
assert X_train.shape[2] == 77, "Number of features is wrong"
assert y_train.shape[1] == 16, "Number of labels is wrong"

In [None]:
def majority_voting_labels(y_train):
    y_train_adjusted = np.where(y_train > 2, y_train - 1, y_train)
    y_train_majority_voted = np.apply_along_axis(
        lambda x: np.bincount(x).argmax(), 1, y_train_adjusted
    )

    return y_train_majority_voted

In [None]:
y_train_majority = majority_voting_labels(y_train)
y_test_majority = majority_voting_labels(y_test)
print(y_train_majority.shape)
print(y_train_majority[:5], y_train_majority[110:115])

In [None]:
y_train_majority_encoded = to_categorical(y_train_majority)
y_test_majority_encoded = to_categorical(y_test_majority)
assert y_train_majority.shape[1] == 5, "Label Encoding is incorrect"

In [None]:
assert not np.any(np.isnan(y_train_majority_encoded)), "y_train contains NaN values"
assert not np.any(np.isnan(X_train)), "X_train still has missing values"

In [None]:
# nan_indices = np.where(np.isnan(X_train))
# print(f"NaN values found at indices: {nan_indices}")
# count = 0
# for index in zip(*nan_indices):
#     count += 1
#     if count>=5000:
#         break
#     print(f"NaN found at index: {index}, value: {X_train[index]}")

In [None]:
y_train_classes, y_train_counts = np.unique(y_train_majority, return_counts=True)
y_test_classes, y_test_counts = np.unique(y_test_majority, return_counts=True)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.bar(y_train_classes, y_train_counts, color="blue", alpha=0.7)
plt.xlabel("Class")
plt.ylabel("Count")
plt.title("Class Distribution in Training Data")

plt.subplot(1, 2, 2)
plt.bar(y_test_classes, y_test_counts, color="green", alpha=0.7)
plt.xlabel("Class")
plt.ylabel("Count")
plt.title("Class Distribution in Test Data")

plt.tight_layout()
plt.show()

In [None]:
def build_lstm_cnn_model(
    input_shape, num_classes=5, conv_filters=64, kernel_size=3, dropout_rate=0.2
):
    model = Sequential()
    model.add(LSTM(units=32, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(units=32, return_sequences=True))
    model.add(Dropout(dropout_rate))

    model.add(Conv1D(filters=64, kernel_size=5, strides=5, activation="relu"))
    # model.add(MaxPooling1D(pool_size=2, strides=2))
    # model.add(Conv1D(filters=128, kernel_size=3, strides=1, activation='relu'))
    model.add(GlobalAveragePooling1D())

    model.add(BatchNormalization())
    model.add(Dense(num_classes, activation="softmax"))

    model.compile(
        optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model

In [None]:
def train_model(model, X_train, y_train, X_val, y_val, batch_size=64, epochs=10):
    early_stopping = EarlyStopping(
        monitor="val_loss", patience=6, restore_best_weights=True
    )
    model_checkpoint = ModelCheckpoint(
        "best_model.keras", monitor="val_loss", save_best_only=True
    )
    # todo - change validation data
    # Is this too bulky? Will it slow down my training sufficiently? What is the tradeoff between generalization and compute cost?
    # metrics_callback = MetricsCallback(validation_data=(X_val, y_val))

    history = model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[model_checkpoint],
    )

    return history

In [None]:
model = build_lstm_cnn_model(input_shape=(16, 77))
model.summary()

In [None]:
history = train_model(
    model,
    X_train,
    y_train_majority_encoded,
    X_test[:10000],
    y_test_majority_encoded[:10000],
    batch_size=32,
    epochs=30,
)

In [None]:
def plot_training_history(history):
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"])
    plt.plot(history.history["val_accuracy"])
    plt.title("Model Accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")

    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("Model Loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Validation"], loc="upper left")

    plt.show()

In [None]:
plot_training_history(history)

In [None]:
model.save("model_2.keras")

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test_majority_encoded)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

In [None]:
best_model = keras.models.load_model("best_model.keras")
test_loss, test_accuracy = best_model.evaluate(X_test, y_test_majority_encoded)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")