In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import h5py
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
import gc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, Activation
from keras.utils import to_categorical
from tensorflow import keras
import pywt


In [3]:
def find_fmri_data_folder(start_path):
    for root, dirs, files in os.walk(start_path):
        if 'meg_data' in dirs:
            return os.path.join(root, 'meg_data/Intra/train')
    raise Exception("meg_data folder not found. Please check the directory structure.")

def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

def assign_label(file_name):
    if file_name.startswith("rest"):
        return 0
    elif file_name.startswith("task_motor"):
        return 1
    elif file_name.startswith("task_story"):
        return 2
    elif file_name.startswith("task_working"):
        return 3
    else:
        return None

def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        dataset_name = get_dataset_name(file_path)
        matrix = f.get(dataset_name)[:]
        return matrix

fmri_data_folder = find_fmri_data_folder('/content/drive/My Drive')
meg_data_list = []
labels = []

for file in os.listdir(fmri_data_folder):
    if file.endswith('.h5'):
        file_path = os.path.join(fmri_data_folder, file)
        data = load_data(file_path)
        meg_data_list.append(data)
        labels.append(assign_label(file))

        # Clear memory
        del data
        gc.collect()

# Convert the list of 2D arrays into a single 3D NumPy array
meg_train_data_array = np.stack(meg_data_list, axis=0)
labels_train_array = np.array(labels)

In [4]:
print(meg_train_data_array.shape)
print(labels_train_array.shape)

(32, 248, 35624)
(32,)


In [5]:
def apply_scaling_z_score(array):
  array_norm = np.zeros((array.shape[0],array.shape[1],array.shape[2]))
  for i in range(array.shape[0]):
    means = np.mean(array[i], axis=1)  # Calculate mean for each sensor
    stds = np.std(array[i], axis=1)    # Calculate standard deviation for each sensor
    array_norm[i] = (array[i] - means[:, np.newaxis]) / stds[:, np.newaxis]   # Subtrack and divide

  del array, means, stds
  gc.collect()
  return array_norm

def butter_lowpass_filter(data, cutoff, fs, order=5):
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

def apply_lowpass(array):
  original_sampling_rate = 2034  # Original sampling rate
  downsampling_factor = 4
  new_sampling_rate = original_sampling_rate / downsampling_factor  # New sampling rate after downsampling
  cutoff_frequency = new_sampling_rate / 2  # Nyquist frequency

  array_filtered = np.zeros_like(array)

  for o in range(array.shape[0]):
      for i in range(array.shape[1]):
          array_filtered[o, i, :] = butter_lowpass_filter(array[o, i, :], cutoff_frequency, original_sampling_rate)
  del array
  gc.collect()
  return array_filtered


def apply_downsampling(array):
  n_observations, n_sensors, n_timepoints = array.shape
  downsampling_factor = 4
  new_n_timepoints = n_timepoints // downsampling_factor
  array_downsamp = np.zeros((n_observations, n_sensors, new_n_timepoints))

  for obs in range(n_observations):
    for sensor in range(n_sensors):
      array_downsamp[obs,sensor,:] = array[obs, sensor, ::downsampling_factor]

  del array, new_n_timepoints, downsampling_factor, n_observations, n_sensors, n_timepoints
  gc.collect()
  return array_downsamp



In [6]:
# APPLY SCALING FILTER AND DOWNSAMPLING
array_scaled_z_score = apply_scaling_z_score(meg_train_data_array)
array_filtered = apply_lowpass(array_scaled_z_score)
del array_scaled_z_score
array_doensampled = apply_downsampling(array_filtered)
del array_filtered

In [7]:
print(array_doensampled.shape)
print(labels_train_array.shape)

(32, 248, 8906)
(32,)


In [8]:
X_train = array_doensampled
del array_doensampled
y_train = labels_train_array.reshape(-1,1)
del labels_train_array

In [9]:
y_train = to_categorical(y_train, num_classes=4)

In [10]:
y_train.shape

(32, 4)

In [11]:
# Retrieve Test data

def find_fmri_data_folder(start_path):
    for root, dirs, files in os.walk(start_path):
        if 'meg_data' in dirs:
            return os.path.join(root, 'meg_data/Intra/test')
    raise Exception("meg_data folder not found. Please check the directory structure.")

def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

def assign_label(file_name):
    if file_name.startswith("rest"):
        print(file_name, "REST")
        return 0
    elif file_name.startswith("task_motor"):
        print(file_name, "MOTOR")
        return 1
    elif file_name.startswith("task_story"):
        print(file_name, "STORY")
        return 2
    elif file_name.startswith("task_working"):
        print(file_name, "WORKING")
        return 3
    else:
        return None

def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        dataset_name = get_dataset_name(file_path)
        matrix = f.get(dataset_name)[:]
        return matrix

fmri_data_folder = find_fmri_data_folder('/content/drive/My Drive')
meg_test_data_list = []
labels_test = []

for file in os.listdir(fmri_data_folder):
    if file.endswith('.h5'):
        file_path = os.path.join(fmri_data_folder, file)
        data = load_data(file_path)
        meg_test_data_list.append(data)
        labels_test.append(assign_label(file))

        # Clear memory
        del data
        gc.collect()

# Convert the list of 2D arrays into a single 3D NumPy array
meg_test_data_array = np.stack(meg_test_data_list, axis=0)
labels_test_array = np.array(labels_test)

X_task_norm = apply_scaling_z_score(meg_test_data_array)
del meg_test_data_array
gc.collect()

# Lowpass filter
X_task_filtered = apply_lowpass(X_task_norm)
del X_task_norm
gc.collect()

# Downsample
X_task_downsamp = apply_downsampling(X_task_filtered)
del X_task_filtered
gc.collect()



obs_test, sensors_test, points_test = X_task_downsamp.shape
X_test = X_task_downsamp.reshape(obs_test, sensors_test, points_test)
y_test_encoded = to_categorical(labels_test_array, num_classes=4)

task_motor_105923_10.h5 MOTOR
task_working_memory_105923_9.h5 WORKING
task_story_math_105923_10.h5 STORY
task_story_math_105923_9.h5 STORY
task_motor_105923_9.h5 MOTOR
rest_105923_9.h5 REST
task_working_memory_105923_10.h5 WORKING
rest_105923_10.h5 REST


In [12]:
X_test.shape, y_test_encoded.shape

((8, 248, 8906), (8, 4))

In [13]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = 120

        # Layer 1
        self.conv1 = nn.Conv2d(248, 16, (1, 64), padding=0)
        self.batchnorm1 = nn.BatchNorm2d(16)

        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(16, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4)
        self.pooling2 = nn.MaxPool2d(2, 4)

        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4)
        self.pooling3 = nn.MaxPool2d((2, 4))

        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints.
        self.fc1 = nn.Linear(4*2*7, 4)


    def forward(self, x):
        # Layer 1
        x = F.elu(self.conv1(x))
        x = self.batchnorm1(x)
        x = F.dropout(x, 0.25)
        # x = x.permute(0, 3, 1, 2)

        # Layer 2
        x = self.padding1(x)
        x = F.elu(self.conv2(x))
        x = self.batchnorm2(x)
        x = F.dropout(x, 0.25)
        x = self.pooling2(x)

        # Layer 3
        x = self.padding2(x)
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        x = F.dropout(x, 0.25)
        x = self.pooling3(x)

        # FC Layer
        x = x.reshape(-1, 4*2*7)
        x = F.sigmoid(self.fc1(x))
        return x


net = EEGNet().float()
print (net.forward(Variable(torch.Tensor(np.random.rand(1, 248, 1, 8906)))))
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters())

RuntimeError: ignored

In [None]:
def evaluate(model, X, Y, params = ["acc"]):
    results = []
    batch_size = 100

    predicted = []

    for i in range(len(X)//batch_size):
        s = i*batch_size
        e = i*batch_size+batch_size

        inputs = Variable(torch.from_numpy(X[s:e]))
        pred = model(inputs)

        predicted.append(pred.data.cpu().numpy())


    inputs = Variable(torch.from_numpy(X)).float()
    predicted = model(inputs)

    predicted = predicted.data.cpu().numpy()

    for param in params:
        if param == 'acc':
            results.append(accuracy_score(Y, np.round(predicted)))
        if param == "auc":
            results.append(roc_auc_score(Y, predicted))
        if param == "recall":
            results.append(recall_score(Y, np.round(predicted)))
        if param == "precision":
            results.append(precision_score(Y, np.round(predicted)))
        if param == "fmeasure":
            precision = precision_score(Y, np.round(predicted))
            recall = recall_score(Y, np.round(predicted))
            results.append(2*precision*recall/ (precision+recall))
    return results

In [None]:
batch_size = 32

for epoch in range(10):  # loop over the dataset multiple times
    print ("\nEpoch ", epoch)

    running_loss = 0.0
    for i in range(len(X_train)//batch_size-1):
        s = i*batch_size
        e = i*batch_size+batch_size

        inputs = torch.from_numpy(X_train[s:e]).float()
        labels = torch.FloatTensor(np.array([y_train[s:e]]).T*1.0).float()

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()


        optimizer.step()

        running_loss += loss.data[0]

    # Validation accuracy
    params = ["acc", "auc", "fmeasure"]
    print (params)
    print ("Training Loss ", running_loss)
    print ("Train - ", evaluate(net, X_train, y_train, params))
    print ("Validation - ", evaluate(net, X_test, y_test_encoded, params))
    print ("Test - ", evaluate(net, X_test, y_test_encoded, params))


Epoch  0
['acc', 'auc', 'fmeasure']
Training Loss  0.0


RuntimeError: ignored

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, ELU, Dropout, ZeroPadding2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras import regularizers
import tensorflow.keras.backend as K

def EEGNet():
    model = Sequential()

    # Layer 1
    model.add(Conv2D(16, (1, 64), input_shape=(248, 8906,1), padding='valid'))
    model.add(BatchNormalization(axis=1))
    model.add(ELU())
    model.add(Dropout(0.25))
    # No permute layer in Keras; adjust as needed

    # Layer 2
    model.add(ZeroPadding2D(padding=((0, 1), (16, 17))))  # Adjusted padding
    model.add(Conv2D(4, (2, 32), padding='valid'))
    model.add(BatchNormalization(axis=1))
    model.add(ELU())
    model.add(Dropout(0.25))
    model.add(MaxPooling2D(pool_size=(2, 4)))

    # Layer 3
    model.add(ZeroPadding2D(padding=((4, 3), (2, 1))))  # Adjusted padding
    model.add(Conv2D(4, (8, 4), padding='valid'))
    model.add(BatchNormalization(axis=1))
    model.add(ELU())
    model.add(Dropout(0.25))
    model.add(MaxPooling2D(pool_size=(2, 4)))

    # FC Layer
    model.add(Flatten())
    model.add(Dense(4, activation='softmax', kernel_regularizer=regularizers.l2(0.1)))  # Output layer for 4 classes

    return model

# Create the model
net = EEGNet()

# Compile the model
net.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
net.summary()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 248, 8843, 16)     1040      
                                                                 
 batch_normalization_3 (Bat  (None, 248, 8843, 16)     992       
 chNormalization)                                                
                                                                 
 elu_3 (ELU)                 (None, 248, 8843, 16)     0         
                                                                 
 dropout_3 (Dropout)         (None, 248, 8843, 16)     0         
                                                                 
 zero_padding2d_3 (ZeroPadd  (None, 249, 8876, 16)     0         
 ing2D)                                                          
                                                                 
 conv2d_6 (Conv2D)           (None, 248, 8845, 4)     

In [None]:
# Assuming X_train, y_train, X_test, y_test_encoded are correctly formatted
# Reshape X_train and X_test if necessary
# Convert y_train and y_test_encoded to categorical if they're not already

net.fit(X_train, y_train, batch_size=32, epochs=10)


Epoch 1/10


In [None]:
# Evaluate on the training set
train_loss, train_accuracy = net.evaluate(X_train, y_train)

# Evaluate on the test set
test_loss, test_accuracy = net.evaluate(X_test, y_test_encoded)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
