## Ensemble Model

In [81]:
import os
import numpy as np
import keras
from keras.utils import to_categorical
import tensorflow as tf
import scipy

In [25]:
# Unzip data zip file
# !rm -r project_data
!unzip project_data.zip &> /dev/null
!rm -r __MACOSX
!mv project/* .
!rm -r project
# &> /dev/null  to quiet output

Unzip the models dirs from the pretrained_models.zip

In [19]:
!rm -r pretrained_models

In [20]:
%%bash
unzip pretrained_models.zip
for filename in pretrained_models/*; do
    unzip $filename -d pretrained_models/ &> /dev/null
    rm $filename
done

Archive:  pretrained_models.zip
   creating: pretrained_models/
 extracting: pretrained_models/lstm-keras-0.5395.zip  
 extracting: pretrained_models/cnn+gru-keras-0.7088.zip  
 extracting: pretrained_models/cnn-keras-0.7156.zip  


In [71]:
tf.keras.utils.set_random_seed(0)

## Loading and visualizing the data

## Loading the dataset

X_test = np.load("X_test.npy")  # (443, 22, 1000)
y_test = np.load("y_test.npy")  # (443, 4)  # (num_trials, output types) # one hot encoded
person_train_valid = np.load("person_train_valid.npy")  # (2115, 1)  vals from 0-8 for participant
X_train_valid = np.load("X_train_valid.npy")  # (2115, 22, 1000)
# print(X_train_valid.shape)  # (2115, 22, 1000)  # (num_trials, channels, time bins)
y_train_valid = np.load("y_train_valid.npy")  # (2115,)
person_test = np.load("person_test.npy")  # (443, 1)

## Adjusting the labels so that

# Cue onset left - 0
# Cue onset right - 1
# Cue onset foot - 2
# Cue onset tongue - 3

y_train_valid -= 769
y_test -= 769

## Visualizing the data

ch_data = X_train_valid[:,8,:]

class_0_ind = np.where(y_train_valid == 0)
ch_data_class_0 = ch_data[class_0_ind]
avg_ch_data_class_0 = np.mean(ch_data_class_0,axis=0)


class_1_ind = np.where(y_train_valid == 1)
ch_data_class_1 = ch_data[class_1_ind]
avg_ch_data_class_1 = np.mean(ch_data_class_1,axis=0)

class_2_ind = np.where(y_train_valid == 2)
ch_data_class_2 = ch_data[class_2_ind]
avg_ch_data_class_2 = np.mean(ch_data_class_2,axis=0)

class_3_ind = np.where(y_train_valid == 3)
ch_data_class_3 = ch_data[class_3_ind]
avg_ch_data_class_3 = np.mean(ch_data_class_3,axis=0)

# Don't increase the number of test samples
def test_data_prep(X):
    total_X = None
    # Trimming the data (sample,22,1000) -> (sample,22,800)
    X = X[:,:,0:800]
    print('Shape of X after trimming:',X.shape)
    # Maxpooling the data (sample,22,800) -> (sample,22,800/sub_sample)
    X_max = np.max(X.reshape(X.shape[0], X.shape[1], -1, 2), axis=3)
    total_X = X_max
    print('Shape of X after maxpooling:',total_X.shape)
    return total_X
X_test_prep = test_data_prep(X_test)
x_test = X_test_prep.reshape(X_test_prep.shape[0], X_test_prep.shape[1], X_test_prep.shape[2], 1)
x_test = np.swapaxes(x_test, 1,3)
x_test = np.swapaxes(x_test, 1,2)
one_hot_y_test = to_categorical(y_test, 4)  # don't one hot encode for this case

num_channels = 22
num_classes = 4
input_shape = x_test.shape[1:]
print("input_shape", input_shape)

Shape of X after trimming: (443, 22, 800)
Shape of X after maxpooling: (443, 22, 400)
input_shape (400, 1, 22)


## Load the models

In [21]:
# Load the models
models = []
ensemble_models = []
pretrained_dir = 'pretrained_models'
for file_name in os.listdir(pretrained_dir):
    file_path = os.path.join(pretrained_dir, file_name)
    loaded_model = keras.models.load_model(file_path)
    ensemble_models.append(loaded_model)

## Majority Vote (Mode) Ensembling

In [84]:
# Predict labels with models
preds = []
for m in ensemble_models:
    preds.append(np.argmax(m.predict(x_test, verbose=0), axis=1))
preds = np.array(preds)

overall_preds = scipy.stats.mode(preds, axis=0)[0]
test_acc = (y_test == overall_preds).sum() / len(y_test)
print("Test Accuracy for Ensemble Model: ", test_acc)

Test Accuracy for Ensemble Model:  0.7223476297968398


## Average Ensembling

In [73]:
model_input = tf.keras.Input(shape=input_shape)
model_outputs = [model(model_input) for model in ensemble_models]
ensemble_output = tf.keras.layers.Average()(model_outputs)
ensemble_model = tf.keras.Model(inputs=model_input, outputs=ensemble_output)
ensemble_model.compile(metrics=['accuracy'])

model_score = ensemble_model.evaluate(x_test, one_hot_y_test, verbose=0)
test_acc = model_score[1]
print(f'Test accuracy: {test_acc : .4f}')

Test accuracy:  0.6907
