In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import h5py
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
import gc
from keras.utils import to_categorical


In [3]:
def find_fmri_data_folder(start_path):
    for root, dirs, files in os.walk(start_path):
        if 'meg_data' in dirs:
            return os.path.join(root, 'meg_data/Intra/train')
    raise Exception("meg_data folder not found. Please check the directory structure.")

def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

def assign_label(file_name):
    if file_name.startswith("rest"):
        return 0
    elif file_name.startswith("task_motor"):
        return 1
    elif file_name.startswith("task_story"):
        return 2
    elif file_name.startswith("task_working"):
        return 3
    else:
        return None

def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        dataset_name = get_dataset_name(file_path)
        matrix = f.get(dataset_name)[:]
        return matrix

fmri_data_folder = find_fmri_data_folder('/content/drive/My Drive')
meg_data_list = []
labels = []

for file in os.listdir(fmri_data_folder):
    if file.endswith('.h5'):
        file_path = os.path.join(fmri_data_folder, file)
        data = load_data(file_path)
        meg_data_list.append(data)
        labels.append(assign_label(file))

        # Clear memory
        del data
        gc.collect()

# Convert the list of 2D arrays into a single 3D NumPy array
meg_train_data_array = np.stack(meg_data_list, axis=0)
labels_train_array = np.array(labels)

In [4]:
print(meg_train_data_array.shape)
print(labels_train_array.shape)

(32, 248, 35624)
(32,)


In [5]:
from scipy.signal import butter, filtfilt, resample
import numpy as np

# Parameters
original_fs = 2034  # Original sampling rate
new_fs = original_fs / 4  # New sampling rate
lowcut = 1.0
highcut = 45.0

# Butterworth Bandpass Filter
def butter_lowpass_filter(data, cutoff, fs, order=5):
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

def apply_lowpass(array):
  original_sampling_rate = 2034  # Original sampling rate
  downsampling_factor = 40
  new_sampling_rate = original_sampling_rate / downsampling_factor  # New sampling rate after downsampling
  cutoff_frequency = new_sampling_rate / 2  # Nyquist frequency

  array_filtered = np.zeros_like(array)

  for o in range(array.shape[0]):
      for i in range(array.shape[1]):
          array_filtered[o, i, :] = butter_lowpass_filter(array[o, i, :], cutoff_frequency, original_sampling_rate)

  del array
  return array_filtered

def apply_scaling(array):
  array_norm = np.zeros((array.shape[0],array.shape[1],array.shape[2]))
  for i in range(array.shape[0]):
    means = np.mean(array[i], axis=1)  # Calculate mean for each sensor
    stds = np.std(array[i], axis=1)    # Calculate standard deviation for each sensor
    array_norm[i] = (array[i] - means[:, np.newaxis]) / stds[:, np.newaxis]   # Subtrack and divide

  del array, means, stds
  gc.collect()
  return array_norm

# Z-score standardization per trial
standardized_data = apply_scaling(meg_train_data_array)
print("NaNs:", np.isnan(standardized_data).any())
del meg_train_data_array
# Apply bandpass filter and downsample
filtered_data = apply_lowpass(standardized_data)
print("NaNs:", np.isnan(filtered_data).any())
del standardized_data
downsampled_data = resample(filtered_data, int(35000 * new_fs / original_fs), axis=2)




NaNs: False
NaNs: False


In [7]:
downsampled_data.shape

(32, 248, 8750)

In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Reshape
from tensorflow.keras.optimizers import Adam


def create_cnn(N, K, P, C):
    model = Sequential()

    # Spatial filter layer
    model.add(Dense(K, activation=None, input_shape=(P, N)))

    # Reshaping for Conv1D
    model.add(Reshape((P, K)))

    # Temporal filter layer 1
    model.add(Conv1D(K, kernel_size=7, strides=1, padding='same', activation='elu'))

    # Reshaping for Conv2D
    model.add(Reshape((P, K, 1)))

    # Temporal filter layer 2
    model.add(Conv2D(K, kernel_size=(1, 7), strides=(1, 1), padding='same', activation='elu'))

    # Pooling
    model.add(MaxPooling2D(pool_size=(1, 2)))

    # Flatten and Fully Connected Layers
    model.add(Flatten())
    model.add(Dropout(0.50))
    model.add(Dense(156, activation='relu'))
    model.add(Dropout(0.50))
    model.add(Dense(C, activation='softmax'))

    return model

# Parameters
N = 248  # Original channels
K = 10   # Latent sources
P = 8750  # Trial sample points after preprocessing
C = 4    # Number of classes

# Create the CNN model
cnn_model = create_cnn(N, K, P, C)

# Compile the model
cnn_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
cnn_model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8750, 10)          2490      
                                                                 
 reshape_2 (Reshape)         (None, 8750, 10)          0         
                                                                 
 conv1d_1 (Conv1D)           (None, 8750, 10)          710       
                                                                 
 reshape_3 (Reshape)         (None, 8750, 10, 1)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 8750, 10, 10)      80        
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 8750, 5, 10)       0         
 g2D)                                                            
                                                      

In [13]:
# Convert labels to categorical if they aren't already
from tensorflow.keras.utils import to_categorical

y_train_encoded = to_categorical(labels_train_array, num_classes=4)
X_train_reshaped = np.transpose(downsampled_data, (0, 2, 1))


In [14]:
y_train_encoded.shape, X_train_reshaped.shape

((32, 4), (32, 8750, 248))

In [15]:
# Model parameters
batch_size = 32  # Adjust based on your system's capability
epochs = 6      # Number of training epochs

# Train the model
history = cnn_model.fit(
    X_train_reshaped, y_train_encoded,
    epochs=epochs,
    batch_size=batch_size
)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [16]:
################ TEST DATA ####################

# Retrieve Test data

def find_fmri_data_folder(start_path):
    for root, dirs, files in os.walk(start_path):
        if 'meg_data' in dirs:
            return os.path.join(root, 'meg_data/Intra/test')
    raise Exception("meg_data folder not found. Please check the directory structure.")

def get_dataset_name(file_name_with_dir):
    filename_without_dir = file_name_with_dir.split('/')[-1]
    temp = filename_without_dir.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name

def assign_label(file_name):
    if file_name.startswith("rest"):
        return 0
    elif file_name.startswith("task_motor"):
        return 1
    elif file_name.startswith("task_story"):
        return 2
    elif file_name.startswith("task_working"):
        return 3
    else:
        return None

def load_data(file_path):
    with h5py.File(file_path, 'r') as f:
        dataset_name = get_dataset_name(file_path)
        matrix = f.get(dataset_name)[:]
        return matrix

fmri_data_folder = find_fmri_data_folder('/content/drive/My Drive')
meg_test_data_list = []
labels_test = []

for file in os.listdir(fmri_data_folder):
    if file.endswith('.h5'):
        file_path = os.path.join(fmri_data_folder, file)
        data = load_data(file_path)
        meg_test_data_list.append(data)
        labels_test.append(assign_label(file))

        # Clear memory
        del data
        gc.collect()

# Convert the list of 2D arrays into a single 3D NumPy array
meg_test_data_array = np.stack(meg_test_data_list, axis=0)
labels_test_array = np.array(labels_test)



In [17]:
from scipy.signal import butter, filtfilt, resample
import numpy as np

# Butterworth Bandpass Filter
def butter_lowpass_filter(data, cutoff, fs, order=5):
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

def apply_lowpass(array):
  original_sampling_rate = 2034  # Original sampling rate
  downsampling_factor = 40
  new_sampling_rate = original_sampling_rate / downsampling_factor  # New sampling rate after downsampling
  cutoff_frequency = new_sampling_rate / 2  # Nyquist frequency

  array_filtered = np.zeros_like(array)

  for o in range(array.shape[0]):
      for i in range(array.shape[1]):
          array_filtered[o, i, :] = butter_lowpass_filter(array[o, i, :], cutoff_frequency, original_sampling_rate)

  del array
  return array_filtered

def apply_scaling(array):
  array_norm = np.zeros((array.shape[0],array.shape[1],array.shape[2]))
  for i in range(array.shape[0]):
    means = np.mean(array[i], axis=1)  # Calculate mean for each sensor
    stds = np.std(array[i], axis=1)    # Calculate standard deviation for each sensor
    array_norm[i] = (array[i] - means[:, np.newaxis]) / stds[:, np.newaxis]   # Subtrack and divide

  del array, means, stds
  gc.collect()
  return array_norm

# Z-score standardization per trial
standardized_data = apply_scaling(meg_test_data_array)
print("NaNs:", np.isnan(standardized_data).any())
del meg_test_data_array
# Apply bandpass filter and downsample
filtered_data = apply_lowpass(standardized_data)
print("NaNs:", np.isnan(filtered_data).any())
del standardized_data
downsampled_data = resample(filtered_data, int(35000 * new_fs / original_fs), axis=2)




NaNs: False
NaNs: False


In [18]:
downsampled_data.shape

(8, 248, 8750)

In [19]:
# Convert labels to categorical if they aren't already

y_test_encoded = to_categorical(labels_test_array, num_classes=4)
X_test_reshaped = np.transpose(downsampled_data, (0, 2, 1))

print(y_test_encoded.shape)
print(X_test_reshaped.shape)

(8, 4)
(8, 8750, 248)


In [20]:
test_loss, test_accuracy = cnn_model.evaluate(X_test_reshaped, y_test_encoded)



In [21]:
# Manually evaluate the model on a small batch of test data
for i in range(len(X_test_reshaped)):  # Just an example to check the first few samples
    sample = np.expand_dims(X_test_reshaped[i], axis=0)
    print("sample shape:", sample.shape)
    prediction = cnn_model.predict(sample)
    actual_label = np.argmax(y_test_encoded[i])
    predicted_label = np.argmax(prediction)
    print(f"Sample {i}: Actual Label = {actual_label}, Predicted Label = {predicted_label}")


sample shape: (1, 8750, 248)
Sample 0: Actual Label = 1, Predicted Label = 0
sample shape: (1, 8750, 248)
Sample 1: Actual Label = 3, Predicted Label = 0
sample shape: (1, 8750, 248)
Sample 2: Actual Label = 2, Predicted Label = 3
sample shape: (1, 8750, 248)
Sample 3: Actual Label = 2, Predicted Label = 3
sample shape: (1, 8750, 248)
Sample 4: Actual Label = 1, Predicted Label = 0
sample shape: (1, 8750, 248)
Sample 5: Actual Label = 0, Predicted Label = 0
sample shape: (1, 8750, 248)
Sample 6: Actual Label = 3, Predicted Label = 3
sample shape: (1, 8750, 248)
Sample 7: Actual Label = 0, Predicted Label = 0
