In [1]:
#import sys
import random
import numpy as np
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler
#!{sys.executable} -m pip install keras

# Preprocessing

In [2]:
"""
    Based on the training data given, we are able to extract 7 attributes:
    1. x accelerometer measurement
    2. y accelerometer measurement
    3. z accelerometer measurement
    4. x gyroscope measurement
    5. y gyroscope measurement
    6. z gyroscope measurement
    7. time stamp for accelerometer and gyroscope measures
    
    We start by creating a dataframe using the csv files provided for readability.
    
    @param x_file: contains the xyz accelerometers and xyz gyroscope measures from the lower limb
    @param x_time_file: contain the time stamps for the accelerometer and gyroscope measures
    @return dataframe of 7 attributes mentioned
"""
def create_dataframe_X(x_file, x_time_file):
    df1 = pd.read_csv(x_file, sep = ',', names = ['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr'])
    df2 = pd.read_csv(x_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We have both the labels and the time stamps for the labels. We create a dataframe from these for
    readability.
    
    @param y_file: contain the labels: 
        (0) indicates standing or walking in solid ground, 
        (1) indicates going down the stairs, 
        (2) indicates going up the stairs, and 
        (3) indicates walking on grass
    @param y_time_file: contain the time stamps for the labels
    @return dataframe of labels and time stamps
""" 
def create_dataframe_Y(y_file, y_time_file):
    df1 = pd.read_csv(y_file, names = ['Label'])
    df2 = pd.read_csv(y_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We take the outputs of create_dataframe_X and create_dataframe_Y. In order to combine both of these
    dataframes, we need look at the time intervals present for when the labels were assigned. We down-sample
    the X to the shape of the y.
    
    @param x_frame: dataframe from create_dataframe_X
    @param y_frame: dataframe from create_dataframe_Y
    @return dataframe with 9 columns (8 attributes and 1 label)
"""
def combine_frames(x_frame, y_frame):
    # Change each dataframe column to a list for iterations
    time_stamp_y = y_frame['Time stamp'].tolist()
    time_stamp_x = x_frame['Time stamp'].tolist()
    
    x_range = [] # Empty list to append data points to
    x_random_row = 0 # Initializing variable to hold randomly selected row instance
    refs = []
    count = 0
    for i in range(0, len(time_stamp_y)):
        while (time_stamp_x[count] <= time_stamp_y[i]) and (count <= len(time_stamp_x)):
            x_range.append(time_stamp_x.index(time_stamp_x[count]))
            count += 1
        x_random_row = random.choice(x_range) # Pick a random value
        refs.append(x_random_row) # Keep record of selected rows
        x_range.clear() # Clear the cache
        continue
    
    # Create a new dataframe based on the refs collected - should be roughly the same length as the y_frame
    entries = []
    for item in refs:
        entry = x_frame.iloc[item]
        entries.append(entry)
    
    found_df = pd.concat(entries, axis = 1)
    found_df = found_df.transpose()
    
    # Combine found_df with y_frame for downsampling
    found_df = found_df.reset_index()
    found_df = found_df.drop(['index'], axis = 1)
    found_df = found_df.drop(['Time stamp'], axis = 1)
    combined_frame = pd.concat([found_df, y_frame], axis = 1)
    
    return combined_frame

In [3]:
"""
    Generating data frames from training data.
"""
# Subject_001_01
df_x_1_1 = create_dataframe_X('TrainingData/subject_001_01__x.csv', 'TrainingData/subject_001_01__x_time.csv')
df_y_1_1 = create_dataframe_Y('TrainingData/subject_001_01__y.csv', 'TrainingData/subject_001_01__y_time.csv')
frame_1_1 = combine_frames(df_x_1_1, df_y_1_1)

# Subject_001_02
df_x_1_2 = create_dataframe_X('TrainingData/subject_001_02__x.csv', 'TrainingData/subject_001_02__x_time.csv')
df_y_1_2 = create_dataframe_Y('TrainingData/subject_001_02__y.csv', 'TrainingData/subject_001_02__y_time.csv')
frame_1_2 = combine_frames(df_x_1_2, df_y_1_2)

# Subject_001_03
df_x_1_3 = create_dataframe_X('TrainingData/subject_001_03__x.csv', 'TrainingData/subject_001_03__x_time.csv')
df_y_1_3 = create_dataframe_Y('TrainingData/subject_001_03__y.csv', 'TrainingData/subject_001_03__y_time.csv')
frame_1_3 = combine_frames(df_x_1_3, df_y_1_3)

# Subject_001_04
df_x_1_4 = create_dataframe_X('TrainingData/subject_001_04__x.csv', 'TrainingData/subject_001_04__x_time.csv')
df_y_1_4 = create_dataframe_Y('TrainingData/subject_001_04__y.csv', 'TrainingData/subject_001_04__y_time.csv')
frame_1_4 = combine_frames(df_x_1_4, df_y_1_4)

# Subject_001_05
df_x_1_5 = create_dataframe_X('TrainingData/subject_001_05__x.csv', 'TrainingData/subject_001_05__x_time.csv')
df_y_1_5 = create_dataframe_Y('TrainingData/subject_001_05__y.csv', 'TrainingData/subject_001_05__y_time.csv')
frame_1_5 = combine_frames(df_x_1_5, df_y_1_5)

# Subject_001_06
df_x_1_6 = create_dataframe_X('TrainingData/subject_001_06__x.csv', 'TrainingData/subject_001_06__x_time.csv')
df_y_1_6 = create_dataframe_Y('TrainingData/subject_001_06__y.csv', 'TrainingData/subject_001_06__y_time.csv')
frame_1_6 = combine_frames(df_x_1_6, df_y_1_6)

# Subject_001_07
df_x_1_7 = create_dataframe_X('TrainingData/subject_001_07__x.csv', 'TrainingData/subject_001_07__x_time.csv')
df_y_1_7 = create_dataframe_Y('TrainingData/subject_001_07__y.csv', 'TrainingData/subject_001_07__y_time.csv')
frame_1_7 = combine_frames(df_x_1_7, df_y_1_7)

# Subject_001_08
df_x_1_8 = create_dataframe_X('TrainingData/subject_001_08__x.csv', 'TrainingData/subject_001_08__x_time.csv')
df_y_1_8 = create_dataframe_Y('TrainingData/subject_001_08__y.csv', 'TrainingData/subject_001_08__y_time.csv')
frame_1_8 = combine_frames(df_x_1_8, df_y_1_8)

# Subject_002_01
df_x_2_1 = create_dataframe_X('TrainingData/subject_002_01__x.csv', 'TrainingData/subject_002_01__x_time.csv')
df_y_2_1 = create_dataframe_Y('TrainingData/subject_002_01__y.csv', 'TrainingData/subject_002_01__y_time.csv')
frame_2_1 = combine_frames(df_x_2_1, df_y_2_1)

# Subject_002_02
df_x_2_2 = create_dataframe_X('TrainingData/subject_002_02__x.csv', 'TrainingData/subject_002_02__x_time.csv')
df_y_2_2 = create_dataframe_Y('TrainingData/subject_002_02__y.csv', 'TrainingData/subject_002_02__y_time.csv')
frame_2_2 = combine_frames(df_x_2_2, df_y_2_2)

# Subject_002_03
df_x_2_3 = create_dataframe_X('TrainingData/subject_002_03__x.csv', 'TrainingData/subject_002_03__x_time.csv')
df_y_2_3 = create_dataframe_Y('TrainingData/subject_002_03__y.csv', 'TrainingData/subject_002_03__y_time.csv')
frame_2_3 = combine_frames(df_x_2_3, df_y_2_3)

# Subject_002_04
df_x_2_4 = create_dataframe_X('TrainingData/subject_001_04__x.csv', 'TrainingData/subject_001_04__x_time.csv')
df_y_2_4 = create_dataframe_Y('TrainingData/subject_001_04__y.csv', 'TrainingData/subject_001_04__y_time.csv')
frame_2_4 = combine_frames(df_x_2_4, df_y_2_4)

# Subject_002_05
df_x_2_5 = create_dataframe_X('TrainingData/subject_002_05__x.csv', 'TrainingData/subject_002_05__x_time.csv')
df_y_2_5 = create_dataframe_Y('TrainingData/subject_002_05__y.csv', 'TrainingData/subject_002_05__y_time.csv')
frame_2_5 = combine_frames(df_x_2_5, df_y_2_5)

# Subject_003_01
df_x_3_1 = create_dataframe_X('TrainingData/subject_003_01__x.csv', 'TrainingData/subject_003_01__x_time.csv')
df_y_3_1 = create_dataframe_Y('TrainingData/subject_003_01__y.csv', 'TrainingData/subject_003_01__y_time.csv')
frame_3_1 = combine_frames(df_x_3_1, df_y_3_1)

# Subject_003_02
df_x_3_2 = create_dataframe_X('TrainingData/subject_003_02__x.csv', 'TrainingData/subject_003_02__x_time.csv')
df_y_3_2 = create_dataframe_Y('TrainingData/subject_003_02__y.csv', 'TrainingData/subject_003_02__y_time.csv')
frame_3_2 = combine_frames(df_x_3_2, df_y_3_2)

# Subject_003_03
df_x_3_3 = create_dataframe_X('TrainingData/subject_003_03__x.csv', 'TrainingData/subject_003_03__x_time.csv')
df_y_3_3 = create_dataframe_Y('TrainingData/subject_003_03__y.csv', 'TrainingData/subject_003_03__y_time.csv')
frame_3_3 = combine_frames(df_x_3_3, df_y_3_3)

# Subject_004_01
df_x_4_1 = create_dataframe_X('TrainingData/subject_004_01__x.csv', 'TrainingData/subject_004_01__x_time.csv')
df_y_4_1 = create_dataframe_Y('TrainingData/subject_004_01__y.csv', 'TrainingData/subject_004_01__y_time.csv')
frame_4_1 = combine_frames(df_x_4_1, df_y_4_1)

# Subject_004_02
df_x_4_2 = create_dataframe_X('TrainingData/subject_004_02__x.csv', 'TrainingData/subject_004_02__x_time.csv')
df_y_4_2 = create_dataframe_Y('TrainingData/subject_004_02__y.csv', 'TrainingData/subject_004_02__y_time.csv')
frame_4_2 = combine_frames(df_x_4_2, df_y_4_2)

# Subject_005_01
df_x_5_1 = create_dataframe_X('TrainingData/subject_005_01__x.csv', 'TrainingData/subject_005_01__x_time.csv')
df_y_5_1 = create_dataframe_Y('TrainingData/subject_005_01__y.csv', 'TrainingData/subject_005_01__y_time.csv')
frame_5_1 = combine_frames(df_x_5_1, df_y_5_1)

# Subject_005_02
df_x_5_2 = create_dataframe_X('TrainingData/subject_005_02__x.csv', 'TrainingData/subject_005_02__x_time.csv')
df_y_5_2 = create_dataframe_Y('TrainingData/subject_005_02__y.csv', 'TrainingData/subject_005_02__y_time.csv')
frame_5_2 = combine_frames(df_x_5_2, df_y_5_2)

# Subject_005_03
df_x_5_3 = create_dataframe_X('TrainingData/subject_005_03__x.csv', 'TrainingData/subject_005_03__x_time.csv')
df_y_5_3 = create_dataframe_Y('TrainingData/subject_005_03__y.csv', 'TrainingData/subject_005_03__y_time.csv')
frame_5_3 = combine_frames(df_x_5_3, df_y_5_3)

# Subject_006_01
df_x_6_1 = create_dataframe_X('TrainingData/subject_006_01__x.csv', 'TrainingData/subject_006_01__x_time.csv')
df_y_6_1 = create_dataframe_Y('TrainingData/subject_006_01__y.csv', 'TrainingData/subject_006_01__y_time.csv')
frame_6_1 = combine_frames(df_x_6_1, df_y_6_1)

# Subject_006_02
df_x_6_2 = create_dataframe_X('TrainingData/subject_006_02__x.csv', 'TrainingData/subject_006_02__x_time.csv')
df_y_6_2 = create_dataframe_Y('TrainingData/subject_006_02__y.csv', 'TrainingData/subject_006_02__y_time.csv')
frame_6_2 = combine_frames(df_x_6_2, df_y_6_2)

# Subject_006_03
df_x_6_3 = create_dataframe_X('TrainingData/subject_006_03__x.csv', 'TrainingData/subject_006_03__x_time.csv')
df_y_6_3 = create_dataframe_Y('TrainingData/subject_006_03__y.csv', 'TrainingData/subject_006_03__y_time.csv')
frame_6_3 = combine_frames(df_x_6_3, df_y_6_3)

# Subject_007_01
df_x_7_1 = create_dataframe_X('TrainingData/subject_007_01__x.csv', 'TrainingData/subject_007_01__x_time.csv')
df_y_7_1 = create_dataframe_Y('TrainingData/subject_007_01__y.csv', 'TrainingData/subject_007_01__y_time.csv')
frame_7_1 = combine_frames(df_x_7_1, df_y_7_1)

# Subject_007_02
df_x_7_2 = create_dataframe_X('TrainingData/subject_007_02__x.csv', 'TrainingData/subject_007_02__x_time.csv')
df_y_7_2 = create_dataframe_Y('TrainingData/subject_007_02__y.csv', 'TrainingData/subject_007_02__y_time.csv')
frame_7_2 = combine_frames(df_x_7_2, df_y_7_2)

# Subject_007_03
df_x_7_3 = create_dataframe_X('TrainingData/subject_007_03__x.csv', 'TrainingData/subject_007_03__x_time.csv')
df_y_7_3 = create_dataframe_Y('TrainingData/subject_007_03__y.csv', 'TrainingData/subject_007_03__y_time.csv')
frame_7_3 = combine_frames(df_x_7_3, df_y_7_3)

# Subject_007_04
df_x_7_4 = create_dataframe_X('TrainingData/subject_007_04__x.csv', 'TrainingData/subject_007_04__x_time.csv')
df_y_7_4 = create_dataframe_Y('TrainingData/subject_007_04__y.csv', 'TrainingData/subject_007_04__y_time.csv')
frame_7_4 = combine_frames(df_x_7_4, df_y_7_4)

# Subject_008_01
df_x_8_1 = create_dataframe_X('TrainingData/subject_008_01__x.csv', 'TrainingData/subject_008_01__x_time.csv')
df_y_8_1 = create_dataframe_Y('TrainingData/subject_008_01__y.csv', 'TrainingData/subject_008_01__y_time.csv')
frame_8_1 = combine_frames(df_x_8_1, df_y_8_1)

In [4]:
"""
    Combining all data frames.
"""
frame_list = [frame_1_1, frame_1_2, frame_1_3, frame_1_4, frame_1_5, frame_1_6, frame_1_7, frame_1_8,
             frame_2_1, frame_2_2, frame_2_3, frame_2_4, frame_2_5,
             frame_3_1, frame_3_2, frame_3_3,
             frame_4_1, frame_4_2,
             frame_5_1, frame_5_2, frame_5_3,
             frame_6_1, frame_6_2, frame_6_3,
             frame_7_1, frame_7_2, frame_7_3, frame_7_4,
             frame_8_1]
data = pd.concat(frame_list)

# Create X and y
X = data[['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr', 'Time stamp']]
y = data['Label']

# Performing random undersampling on the data
rus = RandomUnderSampler(random_state=0)
X_resampled, y_resampled = rus.fit_resample(X, y)
data_resampled = pd.concat([X_resampled, y_resampled], axis = 1)

In [25]:
import numpy as np
from keras.utils import to_categorical

"""
    Creating training and validation sets from the dataframe.
    
    @param frame: dataframe passed in
    @return training and validation sets created from frame passed in
"""
def training_validation_split(frame):
    frame_copy = frame.copy()
    training_set = frame_copy.sample(frac = 0.70, random_state = 0)
    validation_set = frame_copy.drop(training_set.index)
    return training_set, validation_set

training, val = training_validation_split(data_resampled)
training_X = training[['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr', 'Time stamp']]
training_X = np.expand_dims(training_X, axis = 1)
training_y = training['Label']
training_y_encoded = to_categorical(training_y) # One-hot encoding
val_X = val[['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr', 'Time stamp']]
val_X = np.expand_dims(val_X, axis = 1)
val_y = val['Label']
val_y_encoded = to_categorical(val_y) # One-hot encoding

# 40916 timesteps, 7 features, 4 outputs
n_timesteps, n_features, n_outputs = training_X.shape[1], training_X.shape[2], training_y_encoded.shape[1]

In [26]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, LSTM
from tensorflow.python.keras import regularizers
from keras.optimizers import Adam
from sklearn.metrics import classification_report

def define_LSTM_model():
    model = Sequential()
    model.add(LSTM(100, input_shape = (n_timesteps, n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation = 'relu'))
    model.add(Dense(n_outputs, activation = 'softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model
    
def evaluate_model(training_X, training_y_encoded, val_X, val_y_encoded):
    verbose, epochs, batch_size = 1, 15, 64
    model = define_LSTM_model()
    # Fit network
    model.fit(training_X, training_y_encoded, epochs = epochs, batch_size = batch_size, verbose = verbose)
    # Evaluate model
    _, accuracy = model.evaluate(val_X, val_y_encoded, batch_size = batch_size, verbose = verbose)
    return accuracy

accuracy = evaluate_model(training_X, training_y_encoded, val_X, val_y_encoded)
print(accuracy)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
0.4911610401459854
