In [7]:
#import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn import tree
#!{sys.executable} -m pip install tensorflow

# Preprocessing

In [107]:
"""
    Based on the training data given, we are able to extract 7 attributes:
    1. x accelerometer measurement
    2. y accelerometer measurement
    3. z accelerometer measurement
    4. x gyroscope measurement
    5. y gyroscope measurement
    6. z gyroscope measurement
    7. time stamp for accelerometer and gyroscope measures
    
    We start by creating a dataframe using the csv files provided for readability.
    
    @param x_file: contains the xyz accelerometers and xyz gyroscope measures from the lower limb
    @param x_time_file: contain the time stamps for the accelerometer and gyroscope measures
    @return dataframe of 7 attributes mentioned
"""
def create_dataframe_X(x_file, x_time_file):
    df1 = pd.read_csv(x_file, sep = ',', names = ['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr'])
    df2 = pd.read_csv(x_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We have both the labels and the time stamps for the labels. We create a dataframe from these for
    readability.
    
    @param y_file: contain the labels: 
        (0) indicates standing or walking in solid ground, 
        (1) indicates going down the stairs, 
        (2) indicates going up the stairs, and 
        (3) indicates walking on grass
    @param y_time_file: contain the time stamps for the labels
    @return dataframe of labels and time stamps
""" 
def create_dataframe_Y(y_file, y_time_file):
    df1 = pd.read_csv(y_file, names = ['Label'])
    df2 = pd.read_csv(y_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We take the outputs of create_dataframe_X and create_dataframe_Y. In order to combine both of these
    dataframes, we need look at the time intervals present for when the labels were assigned. The goal is
    to return a dataframe that now has an eighth column in addition to the seven columns from the dataframe
    from create_dataframe_X. Additionally, we know that x_frame contains more values than y_frame. We want to
    map these labels accordingly.
    
    @param x_frame: dataframe from create_dataframe_X
    @param y_frame: dataframe from create_dataframe_Y
    @return dataframe with 8 columns (7 attributes and label)
"""
def combine_frames(x_frame, y_frame):
    # Change each dataframe column to a list for iterations
    labels = y_frame['Label'].tolist()
    time_stamp_y = y_frame['Time stamp'].tolist()
    time_stamp_x = x_frame['Time stamp'].tolist()
    
    labels_for_x = [] # Create empty list to gather corresponding labels for x_frame
    count = 0
    for i in range(0, len(time_stamp_y)):
        while (time_stamp_x[count] <= time_stamp_y[i]) and (count <= len(time_stamp_x)):
            labels_for_x.append(labels[i])
            count += 1
        continue
    
    # Concatenate the dataframes
    label_df = pd.DataFrame(labels_for_x, columns = ['Label']) # Convert list back to data frame
    combined_frame = pd.concat([x_frame, label_df], axis = 1)
    return combined_frame

In [108]:
df_x = create_dataframe_X('TrainingData/subject_001_02__x.csv', 'TrainingData/subject_001_02__x_time.csv')
df_y = create_dataframe_Y('TrainingData/subject_001_02__y.csv', 'TrainingData/subject_001_02__y_time.csv')
frame = combine_frames(df_x, df_y)
print(frame)

          X_acc     Y_acc     Z_acc     X_gyr     Y_gyr     Z_gyr  Time stamp  \
0      1.726654  9.619981  1.723327 -0.001997  0.067502  0.126057       0.000   
1      2.225759  9.493385  1.782374  0.008557  0.029333  0.073573       0.025   
2      2.010621  9.481603  1.770000 -0.004651  0.001009  0.062978       0.050   
3      1.614272  9.516440  1.798932  0.009519  0.024405  0.032554       0.075   
4      1.862582  9.353709  1.722649  0.007902  0.022794  0.020837       0.100   
...         ...       ...       ...       ...       ...       ...         ...   
70167  3.762566  8.168921  3.062974  0.015675  0.007165  0.019624    1754.175   
70168  3.729076  8.256303  3.034621 -0.005977  0.006976  0.006051    1754.200   
70169  4.035121  8.696340  3.040610 -0.025315  0.024667 -0.012879    1754.225   
70170  4.013900  8.729596  3.006278 -0.014084  0.002854 -0.013945    1754.250   
70171  3.631926  8.440222  3.063185  0.003864 -0.025512 -0.006094    1754.275   

       Label  
0        0.0

# Training, Validation, Test Sets