In [1]:
import sys
sys.path.append('../src')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import tensorflow as tf
from gait.config import pd
from gait.utils import get_data_by_overlap_percent,get_overlap_data_all_sessions,  split_test_train_by_subjects, remove_invalid_data, get_overlap_data_all_sessions
from gait.training import train_2dcnn_lstm_model
from gait.evalution import save_history, save_test_history, save_accuracy_loss_figure, save_confusion_matrix_figure, compute_validations_predictions

In [2]:
def compute_resultant_acceleration(X):
    return np.sqrt(X[:,:,0] **2 + X[:,:,1] ** 2 + X[:,:,2] ** 2)

def compute_resultant_gyro(X):
    return np.sqrt(X[:,:,3] **2 + X[:,:,4] ** 2 + X[:,:,5] ** 2)

def compute_resultant_angle(X):
    return np.sqrt(X[:,:,6] **2 + X[:,:,7] ** 2 + X[:,:,8] ** 2)

In [3]:
ROOT_DATA_DIR = '../data/'
X_LABELS = ['ax', 'ay', 'az', 'gx', 'gy', 'gz', 'p', 'y', 'r']
SENSORS = {
    "LEFT": "LEFT",
    "RIGHT": "RIGHT",
}
SENSORS_LIST = [SENSORS["LEFT"], SENSORS["RIGHT"]]
sessions = ['session2', 'session4', 'session6']
Y_FILE = 'y_train.csv'
X_PATH = 'data/'

SUBJECT_FILE = 'subject.csv'
# good session 2, 4, 6
# sessions = ['session4']
DEFAULT_SESSIONS = sessions[0]

def get_X_files(label):
    '''
    returns X data file names
    '''
    return 'acc_{}_data.csv'.format(label)


def get_data_overlap_folder(overlapPercent):
    '''
    returns overlapping data foldername
    '''
    return 'data_{}_overlap'.format(overlapPercent)


def create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)


def load_file(filename):
    '''
    load data from a filename
    '''
    dataframe = pd.read_csv(filename, header=None,
                            delimiter=",")
    return dataframe.values


def load_group(filenames):
    '''
    load data from a list of filenames
    '''
    loaded = list()
    for name in filenames:
        data = load_file(name)
        loaded.append(data)
    loaded = np.dstack(loaded)
    return loaded


def path_builder(session, overlapPercent, sensorName, fileName, prefix=""):
    return ROOT_DATA_DIR + session + '/' + sensorName + "/" + get_data_overlap_folder(overlapPercent) + '/' + prefix + fileName


def get_unique_subjects(subjects):
    return np.unique(subjects)


def remove_invalid_data(X, y, subjects):
    nan_indexes = np.argwhere(np.isnan(y))[:, 0]
    if nan_indexes.size != 0:
        y = np.delete(y, nan_indexes[0], axis=0)
        subjects = np.delete(subjects, nan_indexes[0], axis=0)
        X = np.delete(X, nan_indexes[0], axis=0)
    return X, y, subjects


def get_overlap_data_all_sessions(overlapPercent, xLabels=X_LABELS):
    X_list = list()
    y_list = list()
    subject_list = list()
    for session in sessions:
        X, y, subject = get_data_by_overlap_percent(
            overlapPercent, xLabels=X_LABELS, session=session)
        X_list.append(X)
        y_list.append(y)
        subject_list.append(subject)

    return np.vstack(X_list), np.vstack(y_list), np.vstack(subject_list)


def get_data_by_overlap_percent(overlapPercent, session, xLabels=X_LABELS):

    subject_file_path_left = path_builder(session,
                                          overlapPercent, SENSORS["LEFT"], SUBJECT_FILE)
    y_file_path_left = path_builder(
        session, overlapPercent, SENSORS["LEFT"],  Y_FILE)
    x_files = list(map(lambda label: get_X_files(label), xLabels))
    X_files_path_left = list(
        map(lambda fileName: path_builder(session, overlapPercent, SENSORS["LEFT"], fileName, prefix=X_PATH), x_files))
    X_left = load_group(X_files_path_left)
    y_left = load_file(y_file_path_left)
    subject_left = load_file(subject_file_path_left)

    subject_file_path_right = path_builder(session,
                                           overlapPercent, SENSORS["RIGHT"], SUBJECT_FILE)
    y_file_path_right = path_builder(
        session, overlapPercent, SENSORS["RIGHT"],  Y_FILE)
    x_files = list(map(lambda label: get_X_files(label), xLabels))
    X_files_path_right = list(
        map(lambda fileName: path_builder(session, overlapPercent, SENSORS["RIGHT"], fileName, prefix=X_PATH), x_files))
    X_right = load_group(X_files_path_right)
    y_right = load_file(y_file_path_right)
    subject_right = load_file(subject_file_path_right)
    X = np.concatenate((X_left, X_right), axis=0)
    y = np.concatenate((y_left, y_right), axis=0)
    subject = np.concatenate((subject_left, subject_right), axis=0)

    X, y, subject = remove_invalid_data(X, y, subject)
    y = np.array(y, dtype=float)
    y = np.array(y, dtype=int)
    y = np.array(y, dtype=str)

    subject = np.array(subject, dtype=str)
    return (X, y, subject)


def filter_excluded_subject(subjects, excluded_subjects):
    return [subject for subject in subjects if subject not in excluded_subjects]


def split_test_train_by_subjects(X, y, subjects, exclude_subjects=[]):
    unique_subjects = get_unique_subjects(subjects)
    unique_subjects = filter_excluded_subject(
        unique_subjects, exclude_subjects)
    np.random.shuffle(unique_subjects)
    M = len(unique_subjects)

    train_X = X[train_idx, :]
    test_X = X[test_idx, :]
    train_y = y[train_idx, :]
    test_y = y[test_idx, :]
    train_y = np.array(train_y, dtype=float)
    test_y = np.array(test_y, dtype=float)
    train_y = np.array(train_y, dtype=int)
    test_y = np.array(test_y, dtype=int)

    train_y = train_y
    test_y = test_y
    encoded_train_y = tf.keras.utils.to_categorical(train_y)
    encoded_test_y = tf.keras.utils.to_categorical(test_y)

    return train_X, test_X, encoded_train_y, encoded_test_y, train_y, test_y

In [4]:
def augument_data(X):
    resultant_acc = compute_resultant_acceleration(X)
    resultant_gyro = compute_resultant_gyro(X)
    resultant_angle = compute_resultant_angle(X)
    resultant_acc = resultant_acc.reshape(
        resultant_acc.shape[0], resultant_acc.shape[1], 1)
    resultant_gyro = resultant_gyro.reshape(
        resultant_gyro.shape[0], resultant_gyro.shape[1], 1)
    resultant_angle = resultant_angle.reshape(
        resultant_angle.shape[0], resultant_angle.shape[1], 1)
    X = np.concatenate((X, resultant_acc), axis=2)
    X = np.concatenate((X, resultant_gyro), axis=2)
    X = np.concatenate((X, resultant_angle), axis=2)
    return X

def reshape_lstm_data(X):
    n_steps, n_length = 4, 32
    return X.reshape((X.shape[0], n_steps, n_length, 12))

In [5]:
from itertools import combinations


def get_unique_combination(arr):
    return [com for sub in range(len(arr)) for com in combinations(arr, sub + 1)]


def get_subjects(subjects):
    unique_subjects = np.unique(subjects)
    exclude_subjects = ['ddAeJA42PXvwthbW', 'nan']
    return filter_excluded_subject(
        unique_subjects, exclude_subjects)


def exclude(X, y, subjects, exclude_subjects):
    unique_subjects = get_unique_subjects(subjects)
    unique_subjects = filter_excluded_subject(
        unique_subjects, exclude_subjects)
    np.random.shuffle(unique_subjects)
    idx = np.where(subjects == unique_subjects)[0]
    print(idx)
    return X[idx], y[idx]


def exclude_subject_evaluate_model(model, X_t, y_t, subjects, include_list):
    try:

        idx = np.where(subjects == include_list)[0]
        new_X_t = X_t[idx,:]
        new_y_t = y_t[idx,:]
        new_X_t = augument_data(new_X_t)
        new_X_t = reshape_lstm_data(new_X_t)
        new_y_t_en = tf.keras.utils.to_categorical(new_y_t)
        e_h = model.evaluate(
            new_X_t, new_y_t_en, batch_size=128, verbose=1)
        print(e_h)
        return e_h

    except:
        return None

In [6]:
saved_model = tf.keras.models.load_model(
    '../best_model/lstm/80_overlap/best_model.75-0.05-0.9856074452400208.hdf5')
X_t, y_t, subject_t = get_data_by_overlap_percent(0, 'session1')
filtered_subjects = get_subjects(subject_t)
subject_combinations = get_unique_combination(filtered_subjects)


In [7]:
results = {}
for combined_subjects in subject_combinations:
    # print(list(combined_subjects))
    history = exclude_subject_evaluate_model(saved_model,X_t, y_t, subject_t, list(combined_subjects))
    results[combined_subjects] = history

[0.7482959032058716, 0.8358778357505798]
[3.7286629676818848, 0.47827237844467163]
[1.1228007078170776, 0.7911832928657532]
[2.9966650009155273, 0.5270636081695557]
[0.01303049549460411, 0.9940944910049438]
[3.1232028007507324, 0.45514512062072754]
[3.365304708480835, 0.5218706130981445]
[0.917313277721405, 0.8157067894935608]
[2.6581172943115234, 0.5735632181167603]
[0.3863629400730133, 0.913759708404541]
[2.678438425064087, 0.5264474749565125]
[3.4615697860717773, 0.5103448033332825]
[3.407148838043213, 0.4997028112411499]
[3.2878541946411133, 0.5394675135612488]
[3.5010147094726562, 0.46957671642303467]
[2.7582132816314697, 0.560673177242279]
[0.5224137902259827, 0.9009584784507751]
[2.8044698238372803, 0.5086876153945923]
[2.5591111183166504, 0.5955542922019958]
[3.0516834259033203, 0.49579349160194397]
[2.5552778244018555, 0.5535585880279541]
[3.1609232425689697, 0.5464157462120056]
[3.2150845527648926, 0.5239867568016052]
[3.010965347290039, 0.5717852711677551]
[3.281534433364868

In [8]:
# print(results)

for key in results:
    if(results[key][1] > 0.75):    
        print(key, results[key])
    # print('SUBJECT: {} >>>ACC: {}'.format(key, str(value[1])))

('EUbKPOSQgjccjtvi',) [0.7482959032058716, 0.8358778357505798]
('NKdnm6cN3KKAx7R4',) [1.1228007078170776, 0.7911832928657532]
('psiuhPJmLIa2wC4u',) [0.01303049549460411, 0.9940944910049438]
('EUbKPOSQgjccjtvi', 'NKdnm6cN3KKAx7R4') [0.917313277721405, 0.8157067894935608]
('EUbKPOSQgjccjtvi', 'psiuhPJmLIa2wC4u') [0.3863629400730133, 0.913759708404541]
('NKdnm6cN3KKAx7R4', 'psiuhPJmLIa2wC4u') [0.5224137902259827, 0.9009584784507751]
('EUbKPOSQgjccjtvi', 'NKdnm6cN3KKAx7R4', 'psiuhPJmLIa2wC4u') [0.6033175587654114, 0.877648651599884]


In [9]:
print(np.unique(subject_t))

['EUbKPOSQgjccjtvi' 'MMuX9YIh4NTbLZLM' 'NKdnm6cN3KKAx7R4'
 'PE8D53oX060qLbdX' 'ddAeJA42PXvwthbW' 'nan' 'psiuhPJmLIa2wC4u'
 'xYdtS1F8tDyjEIgN']
