#### Libraries

In [3]:
# Data manipulation libs
import pandas as pd
import numpy as np
import random
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Modeling
import tensorflow as tf
from tensorflow import keras

#### Subject Data

In [2]:
# Get subjects info
# subjects_information_df = pd.read_csv('s3://cdl-usecase-data/motionsense/data_subjects_info.csv')


'''
Column  Attribute         [Unit]
Code:   subject ID        [1 to 24]
Weight: Weight of subject [Kg.]
Height: Weight of subject [Cm.]
Age:    Age of subject    [Years]
Gender: Gender of subject [0: F, 1: M]
'''
# subjects_information_df.head(3)

'\nColumn  Attribute         [Unit]\nCode:   subject ID        [1 to 24]\nWeight: Weight of subject [Kg.]\nHeight: Weight of subject [Cm.]\nAge:    Age of subject    [Years]\nGender: Gender of subject [0: F, 1: M]\n'

#### Test Data Description

The test data was collected during these 6 trials:
- Downstairs $\;$[dws]
- Upstairs   $\;$[ups]
- Walking    $\;$[wlk]
- Jogging    $\;$[jog]
- Sitting    $\;$[sit]
- Standing   $\;$[std]

# Data Loading

#### Define parameters for data importing

In [4]:
'''
Dictionary to specify file numbers of trials so all data can be loaded
'''
trial_id_dict = {
    'dws': [1, 2, 11],
    'ups': [3, 4, 12],
    'wlk': [7, 8, 15],
    'jog': [9, 16],
    'sit': [5, 13],
    'std': [6, 14]
}

'''
Get dictionary for all gyroscopic measurements
'''
measurement_dict = {
    'attitude': ['attitude.roll', 'attitude.pitch', 'attitude.yaw'],
    'gravity':  ['gravity.x', 'gravity.y', 'gravity.z'],
    'rotationRate': ['rotationRate.x', 'rotationRate.y', 'rotationRate.z'],
    'userAcceleration': ['userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z']
}

'''
Specify number of subjects and list of subject numbers
*Assumes the subject ID's go from 1 to n incremented by 1*
'''
subject_number = 24
subject_id_lst = list(range(1, subject_number+1))

'''
Get folder location with cleaned test data
'''
test_data = 's3://cdl-usecase-data/motionsense/df_all_data.csv'

#### Import Data

In [5]:
# Load cleaning data
df_all_data = pd.read_csv(test_data)
df_all_data.head()

Unnamed: 0,tick_num,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z,test_type,subject_id,test_trial_number,time_since_start,time_series_data,attitude,gravity,rotationRate,userAcceleration,weight,height,age,gender
0,0,1.528132,-0.733896,0.696372,0.741895,0.669768,-0.031672,0.316738,0.77818,1.082764,0.294894,-0.184493,0.377542,dws,1,1,0.0,2022-01-01 00:00:00.000,1.832682,1.0,1.370498,0.51336,102,188,46,1
1,1,1.527992,-0.716987,0.677762,0.753099,0.657116,-0.032255,0.842032,0.424446,0.643574,0.219405,0.035846,0.114866,dws,1,1,0.02,2022-01-01 00:00:00.020,1.818843,1.0,1.141648,0.250235,102,188,46,1
2,2,1.527765,-0.706999,0.670951,0.759611,0.649555,-0.032707,-0.138143,-0.040741,0.343563,0.010714,0.134701,-0.167808,dws,1,1,0.04,2022-01-01 00:00:00.040,1.812205,1.0,0.37253,0.21545,102,188,46,1
3,3,1.516768,-0.704678,0.675735,0.760709,0.647788,-0.04114,-0.025005,-1.048717,0.03586,-0.008389,0.136788,0.094958,dws,1,1,0.06,2022-01-01 00:00:00.060,1.803822,1.0,1.049628,0.166728,102,188,46,1
4,4,1.493941,-0.703918,0.672994,0.760062,0.64721,-0.05853,0.114253,-0.91289,0.047341,0.199441,0.353996,-0.044299,dws,1,1,0.08,2022-01-01 00:00:00.080,1.783334,1.0,0.921229,0.40872,102,188,46,1


# Modeling data prep

#### Test train validate split

In [7]:
# Specify number of tests
num_tests = 16

# Specify train numbers
train_trial_numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Specify test/validation numbers
test_and_validation_numbers = [11, 12, 13, 14, 15, 16]

# Define dataframes for test train validation sets
df_train = pd.DataFrame()
df_test = pd.DataFrame()
df_validation = pd.DataFrame()

# Specify test and validation subjects
test_subset = random.sample(subject_id_lst, subject_number//2)
val_subset = [n for n in subject_id_lst if n not in test_subset]

# Create test train validation dataframes
for trial_number in range(1, num_tests + 1):
    # Fill in df_train if trial_number in train_trial_numbers
    if trial_number in train_trial_numbers:
        data_temp = df_all_data[df_all_data.test_trial_number == trial_number]
        df_train = pd.concat([df_train, data_temp])
    # Fill in df_test / df_validation if trial_number in test_and_validation_numbers
    elif trial_number in test_and_validation_numbers:
        data_temp = df_all_data[df_all_data.test_trial_number == trial_number]
        df_test = pd.concat([df_test, data_temp[data_temp.subject_id.isin(test_subset)]])
        df_validation = pd.concat([df_validation, data_temp[data_temp.subject_id.isin(val_subset)]])dd

#### Normalize

In [8]:
# Define the columns to be normalized
normalize_columns = [
    'attitude.roll', 'attitude.pitch', 'attitude.yaw',
    'gravity.x', 'gravity.y', 'gravity.z',
    'rotationRate.x', 'rotationRate.y', 'rotationRate.z',
    'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z',
    'attitude', 'gravity', 'rotationRate', 'userAcceleration',
    'weight', 'height', 'age'
]

# Initialize a scaler with range (-1, 1)
scaler = MinMaxScaler(feature_range=(-1, 1))

# Fit the scaler to the training data
scaler.fit(df_train[normalize_columns])

# Transform the training, testing and validation data
df_train[normalize_columns] = scaler.transform(df_train[normalize_columns])
df_test[normalize_columns] = scaler.transform(df_test[normalize_columns])
df_validation[normalize_columns] = scaler.transform(df_validation[normalize_columns])

## LSTM

#### LSTM settings

In [9]:
# Set length of memory (# of observation model looks back)
n_timesteps = 50

# Set number of categories model is predicting
n_categories = 6

#### Select LSTM columns / data

In [10]:
# Column Selection for LSTM input
lstm_columns = [    
    'attitude.roll', # Roll attitude of device    
    'attitude.pitch', # Pitch attitude of device    
    'attitude.yaw', # Yaw attitude of device    
    'gravity.x', # x-axis measurement of gravity    
    'gravity.y', # y-axis measurement of gravity    
    'gravity.z', # z-axis measurement of gravity    
    'rotationRate.x', # x-axis measurement of rotation rate    
    'rotationRate.y', # y-axis measurement of rotation rate    
    'rotationRate.z', # z-axis measurement of rotation rate    
    'userAcceleration.x', # x-axis measurement of user acceleration    
    'userAcceleration.y', # y-axis measurement of user acceleration    
    'userAcceleration.z', # z-axis measurement of user acceleration     
#     'attitude', # Total attitude
#     'gravity', # Total gravity
#     'rotationRate', # Total rotationRate
#     'userAcceleration', # Total userAcceleration   
#     'weight', # Weight of subject
#     'height', # Height of subject
#     'age', # Age of subject
#     'gender' # Gender of subject
]

# Get train test validation split
df_train_lstm = df_train[lstm_columns]
df_val_lstm = df_validation[lstm_columns]
df_test_lstm = df_test[lstm_columns]

#### Wrangle data to be run in LSTM

In [12]:
# Convert DataFrames to arrays
array_train_lstm = df_train_lstm.values
array_val_lstm = df_val_lstm.values
array_test_lstm = df_test_lstm.values

# Create arrays to store values for each test and validation trial
array_test_lstm_trails = {}
X_test_lstm_trials = {}
array_val_lstm_trails = {}
X_val_lstm_trials = {}

for key in trial_id_dict:
    # Extract the values for each test trial
    array_test_lstm_trails[key] = df_test[df_test.test_type == key][lstm_columns].values
    X_test_lstm_trials[key] = array_test_lstm_trails[key]
    # Extract the values for each validation trial
    array_val_lstm_trails[key] = df_validation[df_validation.test_type == key][lstm_columns].values
    X_val_lstm_trials[key] = array_val_lstm_trails[key]    

# Store the number of features and the number of time steps
n_features = len(lstm_columns)
n_timesteps = n_timesteps

# Initialize arrays to store the LSTM inputs for train, validation, and test sets
X_train_lstm = np.zeros((array_train_lstm.shape[0], n_timesteps, n_features))
X_val_lstm = np.zeros((array_val_lstm.shape[0], n_timesteps, n_features))
X_test_lstm = np.zeros((array_test_lstm.shape[0], n_timesteps, n_features))
for key in trial_id_dict:
    X_test_lstm_trials[key] = np.zeros((X_test_lstm_trials[key].shape[0], n_timesteps, n_features))
    X_val_lstm_trials[key] = np.zeros((X_val_lstm_trials[key].shape[0], n_timesteps, n_features))

# Loop through the arrays for each set and create the LSTM input
for arr in [X_train_lstm, X_val_lstm, X_test_lstm]:
    if np.array_equal(arr, X_train_lstm):
        for i in range(n_timesteps, array_train_lstm.shape[0]):
            X_train_lstm[i-n_timesteps] = array_train_lstm[i-n_timesteps:i]
    elif np.array_equal(arr, X_val_lstm):
        for i in range(n_timesteps, array_val_lstm.shape[0]):
            X_val_lstm[i-n_timesteps] = array_val_lstm[i-n_timesteps:i]
    elif np.array_equal(arr, X_test_lstm):
        for i in range(n_timesteps, array_test_lstm.shape[0]):
            X_test_lstm[i-n_timesteps] = array_test_lstm[i-n_timesteps:i]

# Loop through the arrays for each test trial and create the LSTM input
for key in trial_id_dict:
    for i in range(n_timesteps, X_test_lstm_trials[key].shape[0]):
            X_test_lstm_trials[key][i-n_timesteps] = array_test_lstm_trails[key][i-n_timesteps:i]
    for i in range(n_timesteps, X_val_lstm_trials[key].shape[0]):
            X_val_lstm_trials[key][i-n_timesteps] = array_val_lstm_trails[key][i-n_timesteps:i]

# Initilize encoder and set categories
encoder = LabelEncoder()
n_categories = n_categories

# Encode the training y data and convert to categorical using one-hot encoding
encoder.fit(df_train['test_type'])
y_train_lstm = encoder.transform(df_train['test_type'])
y_train_lstm = to_categorical(y_train_lstm, num_classes = n_categories)

# Encode the validation y data and convert to categorical using one-hot encoding
y_val_lstm = encoder.transform(df_validation['test_type'])
y_val_lstm = to_categorical(y_val_lstm, num_classes = n_categories)

# Encode the test y data and convert to categorical using one-hot encoding
y_test_lstm = encoder.transform(df_test['test_type'])
y_test_lstm = to_categorical(y_test_lstm, num_classes = n_categories)

# Encode the test y data for each trial and convert to categorical using one-hot encoding
y_test_lstm_trials = {}
for key in trial_id_dict:
    y_test_lstm_trials[key] = encoder.transform(df_test[df_test.test_type == key]['test_type'])
    y_test_lstm_trials[key] = to_categorical(y_test_lstm_trials[key], num_classes = n_categories)
    
# Encode the validation y data for each trial and convert to categorical using one-hot encoding
y_val_lstm_trials = {}
for key in trial_id_dict:
    y_val_lstm_trials[key] = encoder.transform(df_validation[df_validation.test_type == key]['test_type'])
    y_val_lstm_trials[key] = to_categorical(y_val_lstm_trials[key], num_classes = n_categories)

#### Load model


In [14]:
# Get model path and load model
model_path = "https://cdl-usecase-models.s3.us-east-2.amazonaws.com/motionsense_LSTM/LSTM_model_weights"
model = tf.keras.models.load_model(model_path)

# Check its architecture
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 64)               11520     
 l)                                                              
                                                                 
 dense (Dense)               (None, 6)                 390       
                                                                 
Total params: 11,910
Trainable params: 11,910
Non-trainable params: 0
_________________________________________________________________


#### Get predictions

In [24]:
# Predict on test data using the trained model
y_pred_test = model.predict(X_test_lstm)

# Test trial accuracy
trial_acc_test = {}
for key in trial_id_dict:
    temp_ground = np.argmax(y_test_lstm_trials[key], axis=1)
    temp_predictions = np.argmax(model.predict(X_test_lstm_trials[key]), axis=1)
    trial_acc_test[key] = np.mean(temp_ground == temp_predictions)

# Predict on validation data using the trained model
y_pred_val = model.predict(X_val_lstm)

# Validation trial accuracy
trial_acc_val = {}
for key in trial_id_dict:
    temp_ground = np.argmax(y_val_lstm_trials[key], axis=1)
    temp_predictions = np.argmax(model.predict(X_val_lstm_trials[key]), axis=1)
    trial_acc_val[key] = np.mean(temp_ground == temp_predictions)



#### Print predictions

In [None]:
'''
Get total test and validation accuracies
Get trial test and validation accuracies
'''

# Convert one-hot encoded outputs back to class indices
y_ground_test = np.argmax(y_test_lstm, axis=1)
y_ground_val = np.argmax(y_val_lstm, axis=1)

# Obtain class predictions from model output
y_pred_test_classes = np.argmax(y_pred_test, axis=1)
y_pred_val_classes = np.argmax(y_pred_val, axis=1)

# Calculate accuracy by comparing ground truth and predicted classes
accuracy_test = np.mean(y_ground_test == y_pred_test_classes)
accuracy_val = np.mean(y_ground_val == y_pred_val_classes)

# Print LSTM results
print(f"[INFO] -- Total Accuracy")
print(f"Test: {round(accuracy_test * 100, 3)}%")
print(f"Validation (Test #2): {round(accuracy_val * 100, 3)}%")
print(f"")
print(f"[INFO] -- Test Trials Accuracy")
for key in trial_acc_test:
    print(f"{key}: {round(trial_acc_test[key] * 100, 3)}%")
print("")
print(f"[INFO] -- Validation Trials Accuracy:")
for key in trial_acc_test:
    print(f"{key}: {round(trial_acc_val[key] * 100, 3)}%")

In [None]:
# Ha!