In [7]:
#import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn import tree
#!{sys.executable} -m pip install tensorflow

# Preprocessing

In [115]:
"""
    Based on the training data given, we are able to extract 7 attributes:
    1. x accelerometer measurement
    2. y accelerometer measurement
    3. z accelerometer measurement
    4. x gyroscope measurement
    5. y gyroscope measurement
    6. z gyroscope measurement
    7. time stamp for accelerometer and gyroscope measures
    
    We start by creating a dataframe using the csv files provided for readability.
    
    @param x_file: contains the xyz accelerometers and xyz gyroscope measures from the lower limb
    @param x_time_file: contain the time stamps for the accelerometer and gyroscope measures
    @return dataframe of 7 attributes mentioned
"""
def create_dataframe_X(x_file, x_time_file):
    df1 = pd.read_csv(x_file, sep = ',', names = ['X_acc', 'Y_acc', 'Z_acc', 'X_gyr', 'Y_gyr', 'Z_gyr'])
    df2 = pd.read_csv(x_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We have both the labels and the time stamps for the labels. We create a dataframe from these for
    readability.
    
    @param y_file: contain the labels: 
        (0) indicates standing or walking in solid ground, 
        (1) indicates going down the stairs, 
        (2) indicates going up the stairs, and 
        (3) indicates walking on grass
    @param y_time_file: contain the time stamps for the labels
    @return dataframe of labels and time stamps
""" 
def create_dataframe_Y(y_file, y_time_file):
    df1 = pd.read_csv(y_file, names = ['Label'])
    df2 = pd.read_csv(y_time_file, names = ['Time stamp'])
    frames = [df1, df2]
    result = pd.concat(frames, axis = 1)
    return result
    
"""
    We take the outputs of create_dataframe_X and create_dataframe_Y. In order to combine both of these
    dataframes, we need look at the time intervals present for when the labels were assigned. The goal is
    to return a dataframe that now has an eighth column in addition to the seven columns from the dataframe
    from create_dataframe_X. Additionally, we know that x_frame contains more values than y_frame. We want to
    map these labels accordingly. In the end, we drop data that have 
    
    @param x_frame: dataframe from create_dataframe_X
    @param y_frame: dataframe from create_dataframe_Y
    @return dataframe with 8 columns (7 attributes and label)
"""
def combine_frames(x_frame, y_frame):
    # Change each dataframe column to a list for iterations
    labels = y_frame['Label'].tolist()
    time_stamp_y = y_frame['Time stamp'].tolist()
    time_stamp_x = x_frame['Time stamp'].tolist()
    
    labels_for_x = [] # Create empty list to gather corresponding labels for x_frame
    count = 0
    for i in range(0, len(time_stamp_y)):
        while (time_stamp_x[count] <= time_stamp_y[i]) and (count <= len(time_stamp_x)):
            labels_for_x.append(labels[i])
            count += 1
        continue
    
    # Concatenate the dataframes
    label_df = pd.DataFrame(labels_for_x, columns = ['Label']) # Convert list back to data frame
    combined_frame = pd.concat([x_frame, label_df], axis = 1)
    
    # Drop missing values at the end
    combined_frame = combined_frame.dropna()
    return combined_frame

In [117]:
df_x = create_dataframe_X('TrainingData/subject_001_01__x.csv', 'TrainingData/subject_001_01__x_time.csv')
df_y = create_dataframe_Y('TrainingData/subject_001_01__y.csv', 'TrainingData/subject_001_01__y_time.csv')
frame = combine_frames(df_x, df_y)
print(frame)

          X_acc     Y_acc     Z_acc     X_gyr     Y_gyr     Z_gyr  Time stamp  \
0      4.435275  8.196063  2.974488  0.014215 -0.039157 -0.016744       0.000   
1      4.186920  8.344455  2.908057  0.005771 -0.004480 -0.003345       0.025   
2      4.544637  8.408659  2.890000  0.007967  0.022412  0.001159       0.050   
3      4.849308  8.411614  2.900692  0.027778 -0.010670 -0.014223       0.075   
4      4.509190  8.118649  2.847298  0.021577 -0.045498 -0.021111       0.100   
...         ...       ...       ...       ...       ...       ...         ...   
37884  1.762278  9.583861  0.928481  0.001940  0.008152 -0.003944     947.100   
37885  1.927349  9.584698  0.940000 -0.001852  0.015182 -0.000370     947.125   
37886  1.930000  9.562658  0.874684  0.003927  0.006205  0.000671     947.150   
37887  1.906144  9.555361  0.866144 -0.001111 -0.005937 -0.000032     947.175   
37888  1.900380  9.575190  0.865570 -0.004195 -0.004688 -0.007653     947.200   

       Label  
0        0.0

# Training, Validation, Test Sets