In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import h5py
import pandas as pd
import numpy as np
import tensorflow as tf
import os
from scipy.signal import butter, filtfilt, lfilter
import matplotlib.pyplot as plt
import gc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Flatten, BatchNormalization, Dropout, MaxPooling1D, Concatenate, Input, TimeDistributed, Reshape, Permute
from keras.models import Model
from keras.utils import to_categorical
from tensorflow import keras
import pywt
import scipy.signal as signal
from scipy.integrate import simps

import scipy

In [3]:
# ---------- FUNCTIONS ---------- #

#___SCALING___#
def apply_scaling(array):
  array_norm = np.zeros((array.shape[0],array.shape[1]))
  for i in range(array.shape[0]):
    means = np.mean(array[i])  # Calculate mean for each sensor
    stds = np.std(array[i])    # Calculate standard deviation for each sensor
    array_norm[i] = (array[i] - means) / stds   # Subtrack and divide

  del array, means, stds
  gc.collect()
  return array_norm

#___LOWPASS FILTER___#
def butter_lowpass_filter(data, cutoff, fs, order=5):
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

def apply_lowpass(array):
  original_sampling_rate = 2034  # Original sampling rate
  downsampling_factor = 4
  new_sampling_rate = original_sampling_rate / downsampling_factor  # New sampling rate after downsampling
  cutoff_frequency = new_sampling_rate / 2  # Nyquist frequency

  array_filtered = np.zeros((array.shape[0], array.shape[1]))

  for i in range(array.shape[0]):  # Iterate over sensors
      array_filtered[i, :] = butter_lowpass_filter(array[i, :], cutoff_frequency, original_sampling_rate)

  del array
  return array_filtered

#___DOWNSAMPLING___#
def apply_downsampling(array):
  n_sensors, n_timepoints = array.shape

  downsampling_factor = 4
  new_n_timepoints = n_timepoints // downsampling_factor
  array_downsamp = np.zeros((n_sensors, new_n_timepoints))

  for sensor in range(n_sensors):
    array_downsamp[sensor,:] = array[sensor, ::downsampling_factor]

  del array, new_n_timepoints, downsampling_factor, n_sensors, n_timepoints
  gc.collect()
  return array_downsamp


#___STATS___#
def calculate_statistics(data):
    # data: numpy array of shape (timepoints,)
    return np.array([
        np.mean(data),  # Mean
        np.std(data),   # Standard Deviation
        np.max(data),   # Maximum
        np.min(data),   # Minimum
        np.mean(np.abs(np.diff(data))),  # Average absolute first difference
        np.mean(np.abs(np.diff(data, n=2))),  # Average absolute second difference
        scipy.stats.skew(data),  # Skewness
        scipy.stats.kurtosis(data)  # Kurtosis
    ])

def extract_features_and_segment(data, num_segments):
    # data: numpy array of shape (records, sensors, timepoints)
    num_sensors, num_timepoints = data.shape
    segment_length = num_timepoints // num_segments

    # Initialize array for extracted features
    # Shape: (records, sensors, segments, features)
    extracted_features = np.zeros((num_sensors, num_segments, 8))

    for sensor in range(num_sensors):
        for segment in range(num_segments):
            start = segment * segment_length
            end = start + segment_length
            segment_data = data[sensor, start:end]
            extracted_features[sensor, segment] = calculate_statistics(segment_data)

    return extracted_features




In [4]:
# LOAD FILES

def get_file_paths_and_labels(data_folder, task_numbers):
  file_paths = []
  labels = []
  for task_number in task_numbers:
      for file in os.listdir(data_folder):
          if file.endswith(task_number + 'h5'):
              file_path = os.path.join(data_folder, file)
              file_paths.append(file_path)
              labels.append(assign_label(file))
  return file_paths, labels

def find_fmri_data_folder(start_path):
    for root, dirs, files in os.walk(start_path):
        if 'meg_data' in dirs:
            return os.path.join(root, 'meg_data/Intra/train')
    raise Exception("meg_data folder not found. Please check the directory structure.")

def load_data(file_path):
  with h5py.File(file_path, 'r') as f:
      dataset_name = get_dataset_name(file_path)
      matrix = f.get(dataset_name)[:]
      return matrix

def get_dataset_name(file_name_with_dir):
  filename_without_dir = file_name_with_dir.split('/')[-1]
  temp = filename_without_dir.split('_')[:-1]
  dataset_name = "_".join(temp)
  return dataset_name

def assign_label(file_name):
  if file_name.startswith("rest"):
      return 0
  elif file_name.startswith("task_motor"):
      return 1
  elif file_name.startswith("task_story"):
      return 2
  elif file_name.startswith("task_working"):
      return 3
  else:
      return None

def count_files_with_task_numbers(data_folder, task_numbers):
    total_files = 0
    for file in os.listdir(data_folder):
        if any(file.endswith(task_number + 'h5') for task_number in task_numbers):
            total_files += 1
    return total_files

In [5]:
# Call Preprocessing functions
def preprocess_data_1(data, i, segments):
  print(f"*** FILE {i} ***")
  data = apply_scaling(data)
  print("scaling applied: shape:", data.shape, end=', ')
  data = apply_lowpass(data)
  print("lowpass applied: shape:", data.shape, end=', ')
  data = apply_downsampling(data)
  print("downsam applied: shape:", data.shape, end=', ')
  data = extract_features_and_segment(data,segments)
  print("Data after feature extraction:", np.array(data).shape)
  return np.array(data)

def preprocess_data_2(data, i):
  print(f"*** FILE {i} ***")
  data = apply_scaling(data)
  print("scaling applied: shape:", data.shape, end=', ')
  data = apply_lowpass(data)
  print("lowpass applied: shape:", data.shape, end=', ')
  data = apply_downsampling(data)
  print("downsam applied: shape:", data.shape, end=', ')
  return np.array(data)

In [6]:
# Retieve the initial data from the folder

fmri_data_folder = find_fmri_data_folder('/content/drive')
print("fmri_data_folder:", fmri_data_folder)

meg_data_list = []
labels = []

for file in os.listdir(fmri_data_folder):
    if file.endswith('.h5'):
        file_path = os.path.join(fmri_data_folder, file)
        data = load_data(file_path)
        meg_data_list.append(data)
        labels.append(assign_label(file))

        # Clear memory
        del data
        gc.collect()

# Convert the list of 2D arrays into a single 3D NumPy array
meg_train_data_array = np.stack(meg_data_list, axis=0)
labels_train_array = np.array(labels)

fmri_data_folder: /content/drive/MyDrive/Courses/Pattern Recognition/Lab/Group Assignment/meg_data/Intra/train


In [7]:
print("## Initial shapes of the data ##")
print("MEG:", meg_train_data_array.shape)
print("Labels:",labels_train_array.shape)

## Initial shapes of the data ##
MEG: (32, 248, 35624)
Labels: (32,)


In [None]:
# ----- SEGMENT AND STATS ----- #
segments = 61
processed_data_stats = np.zeros((meg_train_data_array.shape[0], meg_train_data_array.shape[1], segments, 8))
print(processed_data_stats.shape)
for i in range(meg_train_data_array.shape[0]):
  processed_data_stats[i] = preprocess_data_1(meg_train_data_array[i], i+1, segments)

In [None]:
# ----- NO SEGMENT ----- #
processed_data = np.zeros((meg_train_data_array.shape[0], meg_train_data_array.shape[1], meg_train_data_array.shape[2]//4))
print(processed_data.shape)
for i in range(meg_train_data_array.shape[0]):
  processed_data[i] = preprocess_data_2(meg_train_data_array[i], i+1)

In [14]:
X_stats = processed_data_stats
X_raw = processed_data
y_train = to_categorical(labels_train_array, num_classes=4)

del processed_data, processed_data_stats, labels_train_array

In [15]:
print("## Final shapes of the data ##")
print("MEG:", X_stats.shape, X_raw.shape)
print("Labels:",y_train.shape)

## Final shapes of the data ##
MEG: (32, 248, 61, 8) (32, 248, 8906)
Labels: (32, 4)


In [19]:
X_stats_flat = X_stats.reshape(32, 248, -1)
print(X_stats_flat.shape)

(32, 248, 488)


In [46]:
# ----- MODEL ----- #

def build_model(input_stats, input_raw, num_classes):
  # Branch for statistical data
  std_input = Input(shape=input_stats)
  print(std_input)
  std_branch = Dense(64, activation='relu')(std_input)
  std_branch = Dense(32, activation='relu')(std_branch)
  std_branch = Flatten()(std_branch)  # Flatten to 1D

  # Branch for raw data
  raw_input = Input(shape=input_raw)
  print(raw_input)
  raw_branch = Conv1D(filters=32, kernel_size=3, activation='relu')(raw_input)
  raw_branch = Flatten()(raw_branch)

  # Merge branches
  merged = Concatenate()([std_branch, raw_branch])

  # Final layers
  merged = Dense(64, activation='relu')(merged)
  output = Dense(num_classes, activation='softmax')(merged)

  # Create model
  model = Model(inputs=[std_input, raw_input], outputs=output)

  # Compile and train
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [32]:
print("X_stats_flat shape:", X_stats_flat.shape)
print("X_raw shape:", X_raw.shape)
print("Labels shape:", y_train.shape)

print("X_stats_flat shape:", type(X_stats_flat))
print("X_raw shape:", type(X_raw))
print("Labels shape:", type(y_train))

X_stats_flat shape: (32, 248, 488)
X_raw shape: (32, 248, 8906)
Labels shape: (32, 4)
X_stats_flat shape: <class 'numpy.ndarray'>
X_raw shape: <class 'numpy.ndarray'>
Labels shape: <class 'numpy.ndarray'>


In [48]:
model = build_model((X_stats_flat.shape[1], X_stats_flat.shape[2]), (X_raw.shape[1], X_raw.shape[2]), 4)
print((X_stats_flat.shape[1], X_stats_flat.shape[2]), (X_raw.shape[1], X_raw.shape[2]))
print(model)
history = model.fit([X_stats_flat, X_raw], labels, epochs=10, batch_size=32)

KerasTensor(type_spec=TensorSpec(shape=(None, 248, 488), dtype=tf.float32, name='input_35'), name='input_35', description="created by layer 'input_35'")
KerasTensor(type_spec=TensorSpec(shape=(None, 248, 8906), dtype=tf.float32, name='input_36'), name='input_36', description="created by layer 'input_36'")
(248, 488) (248, 8906)
<keras.src.engine.functional.Functional object at 0x7e54001d75b0>


ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>"}), (<class 'list'> containing values of types {"<class 'int'>"})