In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import glob, os


import numpy as np
import pandas as pd
from scipy.stats import mode

from sklearn.ensemble import RandomForestClassifier as RFC, ExtraTreesClassifier as ETC
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score, StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

from tqdm import tqdm, trange 

In [None]:
root = "/content/drive/MyDrive/Projects/2021/BPARC2021/train/"

In [None]:
def segmentation(df, overlap_rate, time_window):
    seg_data = []
    #convert overlap rate to step for sliding window
    overlap = int((1 - overlap_rate)*time_window)
    # interpolate
    df = df.interpolate().ffill().fillna(0)
    for i in range(0, len(df)-time_window+1, overlap):
        seg_data.append(df.loc[i:i+time_window-1, :].copy().reset_index(drop=True))
    return seg_data

def dataloader(overlap, window_size):
    print("loading the data...", end="\t")
    data_list = []
    file_lengths = {1: [], 2: [], 3: []}
    for file in tqdm(glob.glob(root + "TrainData/*/*/*.csv")):
        tempdf = pd.read_csv(file)
        segmented_data = segmentation(tempdf, overlap, window_size)
        if len(segmented_data)>0:
            person = segmented_data[0].iloc[0, -2]
            file_lengths[person].append(len(segmented_data))   
        data_list.extend(segmented_data)
    return data_list, file_lengths

def feature_extractor(data_list):
    print(f"extracting the features...", end="  ")
    X, y = {1:[], 2:[], 3:[]}, {1:[], 2:[], 3:[]}
    for j in trange(0,len(data_list)):
        #extract only xyz columns
        person = data_list[j].loc[0, "subject_id"]
        x_data = data_list[j].drop(columns=["subject_id","activity"])
        X[person].append(x_data.to_numpy())
        y[person].append(data_list[j].iloc[0, -1])
    return X, y

def majority_voting(predictions, file_lengths):
    filtered_predictions = []
    index = 0
    for length in file_lengths:
        file_pred = predictions[index:index+length]
        modes = mode(file_pred)
        majority_choice = modes.mode[0]
        filtered_predictions.extend([majority_choice]*length)
        index += length
    return filtered_predictions

In [None]:
def get_model(input_size):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv1D(30,3,3,input_shape = (input_size,39)))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    # model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv1D(60,3,3))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    # model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Conv1D(120,1,3))
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    # model.add(keras.layers.BatchNormalization())
    # model.add(keras.layers.Conv1D(60,3))
    # model.add(keras.layers.Conv1D(100,4,4))
    # model.add(keras.layers.Conv1D(100,4,4))
    # model.add(keras.layers.Conv1D(200,4,4))
    # model.add(keras.layers.Conv1D(200,4,4))
    # model.add(keras.layers.Conv1D(200,4,4))
    # model.add(keras.layers.Conv1D(200,4,4))
    # model.add(keras.layers.Conv1D(200,4,4))
    # model.add(keras.layers.Flatten())
    # model.add(keras.layers.LSTM(200,return_sequences = True))
    model.add(keras.layers.LSTM(100,return_sequences = True))
    # model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.2))
    # model.add(keras.layers.LSTM(100,return_sequences = True))
    # model.add(keras.layers.LSTM(200,input_shape = (500,60)))
    model.add(keras.layers.LSTM(100))
    # model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.2))
    # model.add(SeqSelfAttention(attention_activation='sigmoid'))
    # model.add(keras.layers.Dropout(0.5))
    # # model.add(Dense(200, activation='relu'))
    # model.add(keras.layers.Dense(500, activation='relu'))
    # model.add(keras.layers.Dense(500, activation='relu'))
    # model.add(keras.layers.Dense(500, activation='relu'))
    model.add(keras.layers.Dense(200))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(100))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.LeakyReLU(alpha=0.3))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(10))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
def loocv_train_evaluate(overlap_rate, window_size, voting=True):
    scores = []
    data, file_lengths = dataloader(overlap_rate, window_size)
    X, y = feature_extractor(data)
    for p1, p2, p3 in [(1,2,3), (2,3,1), (3,1,2)]:
        X_test, y_test = np.array(X[p1]), np.array(y[p1])
        X_train = np.array(X[p2] + X[p3])
        y_train = np.array(y[p2] + y[p3])
        X_train = X_train/X_train.max(axis = 0)
        X_test = X_test/X_test.max(axis = 0)
        onehot_encoder = OneHotEncoder(sparse=False)
        y_train = onehot_encoder.fit_transform(y_train.reshape(-1,1))
        y_test = onehot_encoder.fit_transform(y_test.reshape(-1,1))
        print(f"training model for person {p1}/3...", end="\t")
        model = get_model(window_size)
        model.fit(X_train, y_train, epochs = 50, verbose = 2, validation_data=(X_test,y_test))
    #     pred = model.predict(X_test)
    #     if voting:
    #         filtered_pred = majority_voting(pred, file_lengths[p1])
    #         scores.append(accuracy_score(y_test, filtered_pred))
    #     else:
    #         scores.append(accuracy_score(y_test, pred))
    # print(f"\nScores: {scores}, \tMean: {np.mean(scores)}")
    return scores

In [None]:
loocv_train_evaluate(0.5, 500)

  1%|▏         | 2/151 [00:00<00:07, 19.08it/s]

loading the data...	

100%|██████████| 151/151 [00:08<00:00, 18.66it/s]
  4%|▎         | 133/3573 [00:00<00:02, 1326.65it/s]

extracting the features...  

100%|██████████| 3573/3573 [00:02<00:00, 1236.22it/s]


training model for person 1/3...	Epoch 1/50
77/77 - 5s - loss: 2.0132 - accuracy: 0.1930 - val_loss: 2.9067 - val_accuracy: 0.1052
Epoch 2/50
77/77 - 1s - loss: 1.8144 - accuracy: 0.2517 - val_loss: 2.8717 - val_accuracy: 0.1052
Epoch 3/50
77/77 - 1s - loss: 1.7405 - accuracy: 0.3052 - val_loss: 2.9842 - val_accuracy: 0.1052
Epoch 4/50
77/77 - 1s - loss: 1.6643 - accuracy: 0.3456 - val_loss: 3.2413 - val_accuracy: 0.1078
Epoch 5/50
77/77 - 1s - loss: 1.5650 - accuracy: 0.3958 - val_loss: 3.1919 - val_accuracy: 0.1034
Epoch 6/50
77/77 - 1s - loss: 1.4947 - accuracy: 0.4284 - val_loss: 3.3234 - val_accuracy: 0.1319
Epoch 7/50
77/77 - 1s - loss: 1.4104 - accuracy: 0.4635 - val_loss: 3.3198 - val_accuracy: 0.1283
Epoch 8/50
77/77 - 1s - loss: 1.3589 - accuracy: 0.4851 - val_loss: 3.3945 - val_accuracy: 0.1266
Epoch 9/50
77/77 - 1s - loss: 1.3122 - accuracy: 0.4867 - val_loss: 3.8724 - val_accuracy: 0.1105
Epoch 10/50
77/77 - 1s - loss: 1.2976 - accuracy: 0.5149 - val_loss: 3.8573 - val_acc

[]

In [None]:
loocv_train_evaluate(0.5, 2000)

  2%|▏         | 3/151 [00:00<00:06, 21.31it/s]

loading the data...	

100%|██████████| 151/151 [00:07<00:00, 19.95it/s]
 15%|█▍        | 104/714 [00:00<00:00, 1033.56it/s]

extracting the features...  

100%|██████████| 714/714 [00:00<00:00, 988.04it/s]


training model for person 1/3...	Epoch 1/50
16/16 - 4s - loss: 2.2973 - accuracy: 0.1537 - val_loss: 2.4137 - val_accuracy: 0.1062
Epoch 2/50
16/16 - 0s - loss: 1.8748 - accuracy: 0.2459 - val_loss: 2.5703 - val_accuracy: 0.1062
Epoch 3/50
16/16 - 0s - loss: 1.7791 - accuracy: 0.2664 - val_loss: 2.6135 - val_accuracy: 0.1062
Epoch 4/50
16/16 - 0s - loss: 1.7131 - accuracy: 0.2766 - val_loss: 2.6732 - val_accuracy: 0.1062
Epoch 5/50
16/16 - 0s - loss: 1.7080 - accuracy: 0.3238 - val_loss: 2.6083 - val_accuracy: 0.1062
Epoch 6/50
16/16 - 0s - loss: 1.6095 - accuracy: 0.3975 - val_loss: 2.6534 - val_accuracy: 0.1062
Epoch 7/50
16/16 - 0s - loss: 1.5705 - accuracy: 0.3689 - val_loss: 2.6205 - val_accuracy: 0.1018
Epoch 8/50
16/16 - 0s - loss: 1.5330 - accuracy: 0.4262 - val_loss: 2.8559 - val_accuracy: 0.1062
Epoch 9/50
16/16 - 0s - loss: 1.4643 - accuracy: 0.4447 - val_loss: 2.6605 - val_accuracy: 0.1195
Epoch 10/50
16/16 - 0s - loss: 1.3729 - accuracy: 0.4734 - val_loss: 2.7146 - val_acc

[]

In [None]:
loocv_train_evaluate(0.75, 3000)

  2%|▏         | 3/151 [00:00<00:07, 21.00it/s]

loading the data...	

100%|██████████| 151/151 [00:07<00:00, 19.56it/s]
 12%|█▏        | 91/736 [00:00<00:00, 903.96it/s]

extracting the features...  

100%|██████████| 736/736 [00:00<00:00, 922.19it/s]


training model for person 1/3...	Epoch 1/50
16/16 - 5s - loss: 2.3745 - accuracy: 0.1709 - val_loss: 2.3209 - val_accuracy: 0.1057
Epoch 2/50
16/16 - 0s - loss: 1.8480 - accuracy: 0.2417 - val_loss: 2.5171 - val_accuracy: 0.1057
Epoch 3/50
16/16 - 0s - loss: 1.7464 - accuracy: 0.2888 - val_loss: 2.5606 - val_accuracy: 0.1057
Epoch 4/50
16/16 - 0s - loss: 1.6128 - accuracy: 0.3517 - val_loss: 2.4537 - val_accuracy: 0.1057
Epoch 5/50
16/16 - 0s - loss: 1.5617 - accuracy: 0.4126 - val_loss: 2.4915 - val_accuracy: 0.1057
Epoch 6/50
16/16 - 0s - loss: 1.4241 - accuracy: 0.4892 - val_loss: 2.5050 - val_accuracy: 0.1057
Epoch 7/50
16/16 - 0s - loss: 1.4893 - accuracy: 0.4047 - val_loss: 2.6560 - val_accuracy: 0.1057
Epoch 8/50
16/16 - 0s - loss: 1.3989 - accuracy: 0.5069 - val_loss: 2.7437 - val_accuracy: 0.1057
Epoch 9/50
16/16 - 0s - loss: 1.2815 - accuracy: 0.5167 - val_loss: 2.8378 - val_accuracy: 0.1101
Epoch 10/50
16/16 - 0s - loss: 1.3403 - accuracy: 0.4774 - val_loss: 2.6083 - val_acc

[]

In [None]:
loocv_train_evaluate(0.75, 3500)

  1%|▏         | 2/151 [00:00<00:07, 18.92it/s]

loading the data...	

100%|██████████| 151/151 [00:08<00:00, 18.42it/s]
 15%|█▌        | 86/561 [00:00<00:00, 850.74it/s]

extracting the features...  

100%|██████████| 561/561 [00:00<00:00, 814.12it/s]


training model for person 1/3...	Epoch 1/50
13/13 - 5s - loss: 2.2845 - accuracy: 0.1455 - val_loss: 2.3742 - val_accuracy: 0.1023
Epoch 2/50
13/13 - 0s - loss: 1.8722 - accuracy: 0.2260 - val_loss: 2.5473 - val_accuracy: 0.1080
Epoch 3/50
13/13 - 0s - loss: 1.7801 - accuracy: 0.2753 - val_loss: 2.6410 - val_accuracy: 0.1080
Epoch 4/50
13/13 - 0s - loss: 1.6953 - accuracy: 0.3117 - val_loss: 2.5958 - val_accuracy: 0.1080
Epoch 5/50
13/13 - 0s - loss: 1.5520 - accuracy: 0.3844 - val_loss: 2.6756 - val_accuracy: 0.1080
Epoch 6/50
13/13 - 0s - loss: 1.4132 - accuracy: 0.4909 - val_loss: 2.4814 - val_accuracy: 0.1080
Epoch 7/50
13/13 - 0s - loss: 1.4317 - accuracy: 0.4883 - val_loss: 2.4227 - val_accuracy: 0.1989
Epoch 8/50
13/13 - 0s - loss: 1.3945 - accuracy: 0.5325 - val_loss: 2.6520 - val_accuracy: 0.1080
Epoch 9/50
13/13 - 0s - loss: 1.2636 - accuracy: 0.5792 - val_loss: 2.7437 - val_accuracy: 0.1080
Epoch 10/50
13/13 - 0s - loss: 1.1993 - accuracy: 0.6416 - val_loss: 2.6372 - val_acc

[]