# Imports

In [1]:
from os import listdir
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, BatchNormalization

from tensorflow_addons.metrics import F1Score

from sklearn.model_selection import train_test_split

# GPU Presence

In [2]:
print(tf.config.list_physical_devices('GPU'))
print(tf.test.gpu_device_name())

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
/device:GPU:0


# Mount Drive For Google Colab

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
# %cd drive/MyDrive/ECE542\ Project\ Competition/data

# Load Training Data

### Set Directory to Location of Training Data

In [5]:
files = listdir('data/TrainingData')
files = ['data/TrainingData/' + name for name in files]

### Read and Store Data into Dictionary of DataFrames

In [6]:
# Features of our data
features = ['x accelerometer', 'y accelerometer', 'z accelerometer', 'x gyroscope', 'y gyroscope', 'z gyroscope', 'label']

# Store data for each subject
subjects_dict = {}

for sub_num in range(1, 9):            # Subject A
    subjects_dict[str(sub_num)] = {}
    for trial_num in range(1, 10):     # Trial B
        prefix = 'data/TrainingData/subject_00{}_0{}__'.format(sub_num, trial_num)

        # Break if file does not exist
        if (prefix + 'x_time.csv') not in files:
            break

        ## X
        # Read subject_00A_0B__x_time.csv
        x_time = pd.read_csv(prefix + 'x_time.csv', names=['sec'])
        # Read subject_00A_0B__x.csv
        x_data = pd.read_csv(prefix + 'x.csv'.format(sub_num, trial_num), names=features[:-1])

        # Concatenate time with data, and set time as index
        x_result = pd.concat([x_time, x_data], axis=1)
        x_result = x_result.set_index('sec')

        ## Y
        # Read subject_00A_0B__y_time.csv
        y_time = pd.read_csv(prefix + 'y_time.csv'.format(sub_num, trial_num), names=['sec'])
        y_time.sec = y_time.sec - 0.02
        y_time = y_time.round(decimals=3)

        # Read subject_00A_0B__y.csv
        y_data = pd.read_csv(prefix + 'y.csv'.format(sub_num, trial_num), names=[features[-1]])

        # Concatenate time with label, and set time as index
        y_result = pd.concat([y_time, y_data], axis=1)
        y_result = y_result.set_index('sec')

        ## Now for the stupid part; need to shift y_result from 10Hz to 40Hz
        x_num_pts = x_data.shape[0]
        x_endtime = x_result.index[-1]

        time_temp = np.linspace(0, x_endtime, num=x_num_pts)
        time_temp = np.round(time_temp, decimals=3)

        # Convert above array into dataframe, and set time as index
        df_time_temp = pd.DataFrame(time_temp, columns=['sec'])
        df_time_temp = df_time_temp.set_index('sec')

        new_y_result = pd.concat([df_time_temp, y_result], axis=1)
        new_y_result = new_y_result.fillna(method='ffill')

        ## Create final dataframe with x and y in 40Hz
        result = pd.concat([x_result, new_y_result], axis=1)

        # Confirm that x and y are aligned
        assert x_num_pts == result.shape[0]
        assert x_endtime == result.index[-1]

        subjects_dict[str(sub_num)][str(trial_num)] = result

In [7]:
# Subject 1 Trial 1 data
subjects_dict['1']['1'].head()

Unnamed: 0_level_0,x accelerometer,y accelerometer,z accelerometer,x gyroscope,y gyroscope,z gyroscope,label
sec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,4.435275,8.196063,2.974488,0.014215,-0.039157,-0.016744,0.0
0.025,4.18692,8.344455,2.908057,0.005771,-0.00448,-0.003345,0.0
0.05,4.544637,8.408659,2.89,0.007967,0.022412,0.001159,0.0
0.075,4.849308,8.411614,2.900692,0.027778,-0.01067,-0.014223,0.0
0.1,4.50919,8.118649,2.847298,0.021577,-0.045498,-0.021111,0.0


# Helper Functions

### Static Params

In [8]:
X_RATE = 40
Y_RATE = 10
NUM_SENSORS = 6
NUM_CLASS = 4

### Functions for Data Preparation

In [9]:
def format_data(data, data_params):
    window = data_params['window']
    stride = data_params['stride']
    
    num_epochs = int((data.index[-1] - window) / stride)
    x_epochs = np.zeros((num_epochs, window * X_RATE, NUM_SENSORS))
    y_epochs = np.zeros((num_epochs, NUM_CLASS))

    np_data = data[features].to_numpy()
    x_np, y_np = np_data[:, :-1], np_data[:, -1].astype(int)

    window_size = int(X_RATE * window)

    # window epoch calculation stuff
    for i in range(num_epochs):
        offset = int(X_RATE * stride * i)
        x_epochs[i] = x_np[offset : window_size + offset]
        y_epochs[i][y_np[window_size + offset]] = 1

    return x_epochs, y_epochs.astype(int)

# Shuffles Data with seed
def shuffle_data(x, y):
    shuffle = np.random.permutation(x.shape[0])
    x_shuf, y_shuf = x[shuffle], y[shuffle]
    return x_shuf, y_shuf

# Combine, format, and shuffles subjects data
def format_combine_shuffle(subjects_dict, subjects, data_params, shuffle=False):
    x_seq, y_seq = None, None

    for sub in subjects:
        for trial, data in subjects_dict[sub].items():
            if trial == '1':
                x_temp, y_temp = format_data(data, data_params)
                if x_seq is None:
                    x_seq = x_temp
                    y_seq = y_temp
                else:
                    x_seq = np.concatenate((x_seq, x_temp), axis=0)
                    y_seq = np.concatenate((y_seq, y_temp), axis=0)
    
    if shuffle:
        x_seq, y_seq = shuffle_data(x_seq, y_seq)
    
    return x_seq, y_seq

# Model Building and Training

### Initialize Model Parameters

In [10]:
LSTM_L1 = 256
DENSE_L2 = 256
arch_params = {'layer1': LSTM_L1, 'layer2': DENSE_L2}

INIT_LR = 0.001
EPOCHS = 10
BATCH_SIZE = 32
WEIGHTS = {0:1, 1:4, 2:4, 3:6}
fit_params = {'lr': INIT_LR, 'epochs': EPOCHS, 'batch_size': BATCH_SIZE, 'weights': WEIGHTS}

### Construct Model

In [11]:
def build_model(arch_params, fit_params):
    model = Sequential([
        LSTM(units=arch_params['layer1'], input_shape=(80, 6), return_sequences=False),
        BatchNormalization(),
        Dense(units=arch_params['layer2'], activation='relu'),
        BatchNormalization(),
        Dense(NUM_CLASS, activation='softmax')
    ])
    
    opt = keras.optimizers.Adam(learning_rate=fit_params['lr'], amsgrad=True)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=[F1Score(num_classes=NUM_CLASS, average='macro')])

    return model

In [12]:
model = build_model(arch_params, fit_params)

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 256)               269312    
_________________________________________________________________
batch_normalization (BatchNo (None, 256)               1024      
_________________________________________________________________
dense (Dense)                (None, 256)               65792     
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 1028      
Total params: 338,180
Trainable params: 337,156
Non-trainable params: 1,024
_________________________________________________________________


### Establishing Training Set

In [14]:
# Subjects to include for training
subjects = ['1', '2', '3', '4', '5', '6', '7', '8']

# Data Format Parameters
data_params = {'window': 2, 'stride': 0.5}

x_train, y_train = format_combine_shuffle(subjects_dict, subjects, data_params, shuffle=True)

### Train Model

In [15]:
epochs = fit_params['epochs']
batch_size = fit_params['batch_size']
weights = fit_params['weights']

model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, class_weight=weights, verbose=0)

  ...
    to  
  ['...']


<tensorflow.python.keras.callbacks.History at 0x236d47af988>

# Generate Test Predictions

### Helper Method

In [16]:
# Format test data
def format_test_data(data, data_params):
    window = data_params['window']
    stride = data_params['stride']
    
    num_epochs = int((data.index[-1] - window) / stride)
    x_epochs = np.zeros((num_epochs, window * X_RATE, NUM_SENSORS))

    np_data = data.to_numpy()
    x_np = np_data

    window_size = int(X_RATE * window)

    # window epoch calculation stuff
    for i in range(num_epochs):
        offset = int(X_RATE * stride * i)
        x_epochs[i] = x_np[offset : window_size + offset]

    return x_epochs

### Load Test Data

In [17]:
# Testing Set Directory
test_dir = 'data/TestData/'

In [18]:
subjects_test = {}

for sub_num in range(9, 13):
    subjects_test[str(sub_num)] = {}
    prefix = test_dir + 'subject_0{:02d}_01__'.format(sub_num, trial_num)

    ## X
    # Read subject_00A_0B__x_time.csv
    x_time = pd.read_csv(prefix + 'x_time.csv', names=['sec'])
    # Read subject_00A_0B__x.csv
    x_data = pd.read_csv(prefix + 'x.csv', names=features[:-1])

    # Concatenate time with data, and set time as index
    x_result = pd.concat([x_time, x_data], axis=1)
    x_result = x_result.set_index('sec')

    subjects_test[str(sub_num)] = x_result

In [19]:
# Subject 9 DataFrame
subjects_test['9'].head()

Unnamed: 0_level_0,x accelerometer,y accelerometer,z accelerometer,x gyroscope,y gyroscope,z gyroscope
sec,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,0.396845,8.71,-4.476044,0.001111,-0.008889,0.004137
0.025,0.413795,8.706898,-4.456205,0.0,-0.002838,0.007162
0.05,0.423914,8.707828,-4.502172,0.002037,0.001111,0.007778
0.075,0.44298,8.718078,-4.50298,0.000403,0.004648,0.008384
0.1,0.485234,8.725234,-4.52,-2e-05,0.006667,0.007778


### Generate and Write Test Labels

In [20]:
test_dir = 'data/TestData/LabelPredictions/'

In [21]:
X_Time_Window = data_params['window']
Y_Time_Step_Size = data_params['stride']

def predict_write_test(model, sub_dict):
    for sub, data in sub_dict.items():
        prefix = 'data/TestData/subject_{:03d}_01__'.format(int(sub))
        
        x_final_test = format_test_data(data, data_params)
        
        # Predict on new dataset
        yhat = model.predict(x_final_test)
        yhat_write = np.argmax(yhat, axis=1)
        yhat_len = yhat_write.shape[0]
        
        y_time = pd.read_csv(prefix + 'y_time.csv', names=['sec'])
        Expected_Len = int(data.index[-1] * 10 - 0.01)
        
        # Format length of predicted labels
        Expanded_Y = []
        for i in range(int(X_Time_Window * Y_RATE)):
            Expanded_Y.append(yhat_write[0])

        for i in range(1, yhat_len):
            for j in range(int(Y_Time_Step_Size * Y_RATE)):
                Expanded_Y.append(yhat_write[i])

        if (len(Expanded_Y) < Expected_Len):
            while (len(Expanded_Y) != Expected_Len):
                Expanded_Y.append(Expanded_Y[-1])
        
        # Write to csv
        df_yhat = pd.DataFrame(Expanded_Y)
        
        write_dir = test_dir + 'subject_{:03d}_01__y.csv'.format(int(sub))
        df_yhat.to_csv(write_dir, index=False)

In [22]:
predict_write_test(model, subjects_test)