# Import Required Libraries
Import the necessary libraries, including scipy.io for reading MATLAB files.

In [3]:
%pip install -q -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Importing the required library
import scipy.io as sio
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd

# Define the constants for the Dataset:

In [5]:
SAMPLE_DURATION = 64
INTENSIFIED_N_TIMES = 20
N_CHARACTERS = 36

# Define the Function to Load MATLAB Files
Define a function that uses scipy.io.loadmat() to load a MATLAB file and return the data.

In [6]:
def load_matlab_file(file_path):
    data = sio.loadmat(file_path)
    
    return data

# Read the MATLAB Files
Use the defined function to load data from a series of MATLAB files.

In [7]:
# List of MATLAB files
matlab_files = os.listdir('data/raw')

# Initialize an empty dictionary to store the data from all files
all_data = {}

# Loop through each file
for file in matlab_files:
    # Load the data from the file using the function defined above
    raw_data = load_matlab_file(f"data/raw/{file}")['data'][0]
    eeg_data = raw_data['X'][0] # Shape  (347704, 8)
    print(eeg_data.shape)
    stimulus_type = raw_data['y'][0]  # Shape (347704, 1)
    stimulus_class = raw_data['y_stim'][0] # Shape (347704, 1)
    # trial_start_indices = data['trial'][0][0] # Shape (35, 1)
    
    # Create a DataFrame with eeg_data, stimulus_type, and stimulus_class
    data = pd.DataFrame(eeg_data, columns=[f'ch_{i}' for i in range(1, 9)])
    data['stimulus_type'] = stimulus_type
    data['stimulus_class'] = stimulus_class

    # Now we filter the data to only include the rows where the stimulus_type is 1 or 2
    data = data[data['stimulus_type'].isin([1, 2])]

    # Store the data in the dictionary
    all_data[file] = data

all_data['A01.mat'].shape

(347704, 8)
(347704, 8)
(347704, 8)
(347704, 8)
(347704, 8)
(347704, 8)
(347704, 8)
(347704, 8)


(134400, 10)

# Conver the matlab matrices to numpy arrays

And then create a DataFrame with the data.

In [8]:
def save_data(filename, data):
    np.save(filename, data)

In [24]:
"""
Trial info:
(64 x 20 = 1280) samples per trial
(samples_per_stimuli x n_stimuli x n_characters)
"""
def transform_data(data: list, subject: int, n_samples, sample_offset: int = 32):
    eeg_data = data['X'][0]
    stimulus_type = data['y'][0]
    trial_start_indices = data['trial'][0][0]

    trial_duration = SAMPLE_DURATION * INTENSIFIED_N_TIMES

    sample_duration_to_use = int(SAMPLE_DURATION * n_samples)

    final_data = np.zeros((2, # 2 classes
                           sample_duration_to_use,  # number of samples to take into account per trial
                           INTENSIFIED_N_TIMES * len(trial_start_indices),  # number of trials
                           8)) # number of channels

    # (#n_trial, #channels, #time, #depth)
    for i, start_idx in enumerate(trial_start_indices):
        end_idx = start_idx + trial_duration
        trial_data = eeg_data[(start_idx + sample_offset):(end_idx + sample_offset)]
        trial_stimulus_type = stimulus_type[(start_idx + sample_offset):(end_idx + sample_offset)]

        # Reshape the trial data to have the shape (64,   20,                , 8)
        trial_data_ = trial_data.reshape((sample_duration_to_use, int(INTENSIFIED_N_TIMES / n_samples), 8))
        trial_stimulus_type_ = trial_stimulus_type.reshape((sample_duration_to_use, int(INTENSIFIED_N_TIMES / n_samples), 1))

        # Loop through each observation in the trial
        for j in range(INTENSIFIED_N_TIMES):
            # Get the stimulus type
            stimulus_type_ = trial_stimulus_type_[:, j]

            # Get the data for the character
            character_data = trial_data_[:, j, :]

            # Check if the stimulus type is 1 (not target) or 2 (target)
            if 1 in stimulus_type_:
                # Append the data to the final_data array
                final_data[0, :, i * INTENSIFIED_N_TIMES + j] = character_data
            else:
                # Append the data to the final_data array
                final_data[1, :, i * INTENSIFIED_N_TIMES + j] = character_data

    return final_data

In [27]:

"""
Data set
X=[samples X Channels]
Y=[StimType X 1] ( 1 = NonTarget stimulus, 2 = Target Stimulus)
Y_stim= [StimClass X 1] intensified stimulus classes (Figure 2)
Trial=[Trials X 1] trial start in samples
Classes = textual description of conditions related to Y
Classes_stim = textual description of conditions related to Y_stim
"""
files = ["A01.mat","A02.mat","A03.mat","A04.mat","A05.mat","A06.mat","A07.mat","A08.mat"]


# Create directories for classes 0 and 1
os.makedirs('./data/partitioned/class_1', exist_ok=True)
os.makedirs('./data/partitioned/class_2', exist_ok=True)

for file in files:
    matlab_data = load_matlab_file(f"data/raw/{file}")['data'][0]
    df = transform_data(matlab_data, file.replace('.mat', ''), 0.5)
    print(f"File: {file}") # (2, 32, 700, 8) 700 is because 35 * 20 (35 chars * 20 intensitifacted per char)


    # Now, save the data to the respective directories
    save_data(f'./data/partitioned/class_1/{file.replace(".mat", "")}', df[0])
    save_data(f'./data/partitioned/class_2/{file.replace(".mat", "")}', df[1])

File: A01.mat
File: A02.mat
File: A03.mat
File: A04.mat
File: A05.mat
File: A06.mat
File: A07.mat
File: A08.mat


In [None]:
x = np.array([[1,2,3]])
print(x.shape)
np.swapaxes(x,0,1).shape

(1, 3)


(3, 1)