In [1]:
import scipy
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt


In [2]:
%run "../../functions/get_data.ipynb"
%run "../../functions/bandpass.ipynb"
%run "../../functions/trimmer.ipynb"

# DATA PREPROCESSING

In [3]:
folder_path = "../../data/swd_data/walk_inst_raw/"
old_folder_path = "../../data/initial_data/walk_inst_raw/"

In [4]:
file_list = get_file_list(folder_path)
old_file_list = get_file_list(old_folder_path)

print(file_list)
print(old_file_list)

['../../data/swd_data/walk_inst_raw/P_1.mat', '../../data/swd_data/walk_inst_raw/P_10.mat', '../../data/swd_data/walk_inst_raw/P_2.mat', '../../data/swd_data/walk_inst_raw/P_3.mat', '../../data/swd_data/walk_inst_raw/P_4.mat', '../../data/swd_data/walk_inst_raw/P_5.mat', '../../data/swd_data/walk_inst_raw/P_9.mat', '../../data/swd_data/walk_inst_raw/S_1.mat', '../../data/swd_data/walk_inst_raw/S_10.mat', '../../data/swd_data/walk_inst_raw/S_2.mat', '../../data/swd_data/walk_inst_raw/S_3.mat', '../../data/swd_data/walk_inst_raw/S_4.mat', '../../data/swd_data/walk_inst_raw/S_5.mat', '../../data/swd_data/walk_inst_raw/S_9.mat']
['../../data/initial_data/walk_inst_raw/P_1.mat', '../../data/initial_data/walk_inst_raw/P_10.mat', '../../data/initial_data/walk_inst_raw/P_2.mat', '../../data/initial_data/walk_inst_raw/P_3.mat', '../../data/initial_data/walk_inst_raw/P_4.mat', '../../data/initial_data/walk_inst_raw/P_5.mat', '../../data/initial_data/walk_inst_raw/P_9.mat', '../../data/initial_da

In [5]:
X = []
Y = []

removed_trials = 0
initial_trials = 0

for i, file in enumerate(file_list):
    x = scipy.io.loadmat(file)
    x = x['file_swds']

    old_x = scipy.io.loadmat(old_file_list[i])
    old_x = old_x['data']
    initial_trials += old_x.shape[0]

    if(x.shape[0] > 0):

        print(f"{old_x.shape[0]} trials => {x.shape[1]} trials")

        removed_trials = removed_trials + old_x.shape[0] - x.shape[1]
        
        data = np.array([ m[0] for m in x[0] ])
        
        first_letter = file.split('/')[-1][0]

        if first_letter == 'P':
            y = np.zeros(x.shape[1])
        else:
            y = np.ones(x.shape[1])
            
        Y.append(y)
        X.append(data)

    else:
        print(f"{old_x.shape[0]} trials => 0 trials")
        removed_trials += old_x.shape[0]

print(f"Total trials removed: {removed_trials} out of {initial_trials}")



56 trials => 1 trials
37 trials => 26 trials
49 trials => 0 trials
56 trials => 51 trials
58 trials => 25 trials
57 trials => 26 trials
60 trials => 50 trials
60 trials => 25 trials
67 trials => 33 trials
60 trials => 26 trials
80 trials => 50 trials
79 trials => 16 trials
66 trials => 46 trials
84 trials => 60 trials
Total trials removed: 434 out of 869


In [6]:
input_data = []
for x in X:
    data = []
    for i, trial in enumerate(x):
        trial_data = []
        for j, channel in enumerate(trial):
            channel_data = []
            for k, ocm in enumerate(channel):
                ocm_data = []
                for l, sample in enumerate(ocm):
                    ocm_data.append(sample)
                channel_data.append(ocm_data)
            trial_data.append(channel_data)
        data.append(trial_data)

    data = np.array(data)
    input_data.append(data)


# reshape the data in accordance to the input shape of the model
for i, data in enumerate(input_data):
    input_data[i] = np.reshape(data, (data.shape[0], data.shape[1], data.shape[3], data.shape[2]))

In [7]:
for i, data in enumerate(input_data):
    print(data.shape, Y[i].shape)


(1, 11, 250, 3) (1,)
(26, 11, 250, 3) (26,)
(51, 11, 250, 3) (51,)
(25, 11, 250, 3) (25,)
(26, 11, 250, 3) (26,)
(50, 11, 250, 3) (50,)
(25, 11, 250, 3) (25,)
(33, 11, 250, 3) (33,)
(26, 11, 250, 3) (26,)
(50, 11, 250, 3) (50,)
(16, 11, 250, 3) (16,)
(46, 11, 250, 3) (46,)
(60, 11, 250, 3) (60,)


# DEFINING, COMPILING AND TRAINING THE MODEL

In [None]:
best_acc = 0
best_model = None

accuracies = []
p_acc = []
s_acc = []

num_folds = 10
kfold = KFold(n_splits=num_folds, shuffle=True)

# model.summary()

for index_1 in [1,2,3,4,5]:
    for index_2 in range(6, 13):

        print(f"Training on {index_1} and {index_2}")

        # build training set from allfiles expect the ones with index_1 and index_2
        X_train = [input_data[i] for i in range(len(input_data)) if i != index_1 and i != index_2]
        Y_train = [Y[i] for i in range(len(Y)) if i != index_1 and i != index_2]

        # build test set from the files with index_1 and index_2
        X_test = [input_data[i] for i in range(len(input_data)) if i == index_1 or i == index_2]
        Y_test = [Y[i] for i in range(len(Y)) if i == index_1 or i == index_2]

        X_train = np.concatenate(X_train)
        Y_train = np.concatenate(Y_train)

        model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(30, (8,8), activation='relu', input_shape=(11, 250, 3)),
            tf.keras.layers.MaxPooling2D((2,2)),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(2, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])

        model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

        
        # Train the model
        model.fit(X_train, Y_train, epochs=50, batch_size=32, verbose=0)

        # make prediction on the 2 test files individually
        X_test_1 = X_test[0]
        Y_test_1 = Y_test[0]
        X_test_2 = X_test[1]
        Y_test_2 = Y_test[1]

        probs_1 = model.predict(X_test_1)
        probs_2 = model.predict(X_test_2)

        # get accuracy for each test file
        acc_1 = balanced_accuracy_score(Y_test_1, probs_1.round())
        acc_2 = balanced_accuracy_score(Y_test_2, probs_2.round())

        p_acc.append(acc_1)
        s_acc.append(acc_2)

        # get average accuracy
        acc = (acc_1 + acc_2) / 2
        accuracies.append(acc)

        if(acc > best_acc):
            best_acc = acc
            best_model = model

### Evaluate the model on the test data

In [59]:
print(f"Best accuracy: {best_acc}")
print(f"Average accuracy: {np.mean(accuracies)} ± {np.std(accuracies)}")

print(f"Average accuracy for P: {np.mean(p_acc)} ± {np.std(p_acc)}")
print(f"Average accuracy for S: {np.mean(s_acc)} ± {np.std(s_acc)}")

Best accuracy: 0.658695652173913
Average accuracy: 0.5323070550506612
Std accuracy: 0.07511040185459088
Average accuracy for P: 0.2276181857358328 ± 0.18869231316235932
Average accuracy for S: 0.8369959243654895 ± 0.141028839518155
