In [None]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

MIND WAVE

In [None]:
df = pd.read_csv("MW.txt", delimiter='\t', header=None) # 4th column is the event marker
#print(df.head(10))
print(df.shape)
df[4].unique()

(67635, 7)


array([ 0,  1,  4,  8,  5,  7,  2,  9,  3,  6, -1])

In [None]:
df.columns = ['ID', 'Timestamp', 'Subject', 'Channel', 'Stimulus', 'Count', 'Signal']
#print(raw_series.head(10))

In [None]:
#print(df['Signal'].head(10))
df['Signal'] = df['Signal'].apply(lambda s: list(map(float, s.split(','))))

median_length = int(np.median(df['Count']))

print(f"Median signal length: {median_length}")

def resample_signal(signal, target_length):
    if len(signal) == target_length:
        return signal  # No resampling needed
    if len(signal) < 2:
        return np.full(target_length, signal[0] if len(signal) == 1 else 0.0)  # Pad constant
    original_indices = np.arange(len(signal))
    target_indices = np.linspace(0, len(signal) - 1, target_length)
    interpolator = interp1d(original_indices, signal, kind='linear')
    return interpolator(target_indices)

df['Signal'] = (df['Signal'].apply(lambda s: resample_signal(s, median_length))).T

#print(df['Signal'].head(10))  
#print(df['Signal'].apply(len))  


Median signal length: 953


In [None]:
def interpolate_nans(signal_1d):
    if np.isnan(signal_1d).any():
        x = np.arange(len(signal_1d))
        nan_mask = np.isnan(signal_1d)
        signal_1d[nan_mask] = np.interp(x[nan_mask], x[~nan_mask], signal_1d[~nan_mask])
    return signal_1d

# Process each row as an individual epoch
epochs_data = []
labels = []

for _, row in df.iterrows():
    signal = row['Signal']
    signal = resample_signal(signal, median_length)
    signal = interpolate_nans(signal)

    signal = np.array(signal).reshape(1, -1)  # shape: (1, median_length)
    epochs_data.append(signal)
    labels.append(row['Stimulus']) 

# Final stacking
epochs_data = np.stack(epochs_data, axis=0)  # shape: (n_epochs, 1, median_length)
labels = np.array(labels)

print(f"Final shape: {epochs_data.shape}")
print(f"Labels shape: {labels.shape}")


Final shape: (67635, 1, 953)
Labels shape: (67635,)


In [None]:
from sklearn.preprocessing import MinMaxScaler

n_channels = epochs_data.shape[1]
for ch in range(n_channels):
    scaler = MinMaxScaler()
    # Reshape for scaler: (n_epochs, n_times)
    reshaped = epochs_data[:, ch, :]
    reshaped_scaled = scaler.fit_transform(reshaped)
    epochs_data[:, ch, :] = reshaped_scaled


print(f"Cleaned dataset shape: {epochs_data.shape} (epochs, channels, times)")
print(f"Stimulus labels (example): {labels[:5]}")


Cleaned dataset shape: (67635, 1, 953) (epochs, channels, times)
Stimulus labels (example): [0 1 4 1 8]


MUSE

In [None]:
df = pd.read_csv("MU.txt", delimiter='\t', header=None) # 4th column is the event marker
#print(df.head(10))
print(df.shape)

(163932, 7)


In [None]:
print(len(df.iloc[0, 6]))
print(df[4].unique()) 
print(df[5].unique())

1835
[ 6  7  1  8  9  3  2  0  5  4 -1]
[459 493 442 476 425 510 527 544 474 461 452 408 486 449 446 506 561 578
 495 457 472 463 444 497 438 482 470 508 467 468 492 460 477 458 462 473
 464 488 483 469 478 479 485 480 455 440 439 475 465 501 536 612 450 490
 454 481 456 453 491 443 484 487 448 471 524 445 451 498 509]


In [None]:
sfreq = 250.0  # Hz
n_channels = 4  
n_blocks = len(df) // n_channels
ch_names = df.iloc[0:n_channels, 3].tolist()
ch_types = ['eeg'] * n_channels
# Initialize signal storage for each channel
channel_data = defaultdict(list)

# Go through each block and append signal to corresponding channel
for i in range(n_blocks):
    block = df.iloc[i * n_channels : (i + 1) * n_channels]
    for j in range(n_channels):
        ch_name = block.iloc[j, 3]
        signal_str = block.iloc[j, 6]
        signal = [float(x) for x in signal_str.split(',') if x.strip()]
        channel_data[ch_name].append(signal)

# Concatenate signals per channel to create full time series
eeg_data = np.array([np.concatenate(channel_data[name]) for name in ch_names], dtype=np.float32)

print("EEG data shape after concatenation:", eeg_data.shape)


EEG data shape after concatenation: (4, 19196423)


In [None]:
print(eeg_data)

[[475. 474. 477. ... 309. 317. 314.]
 [468. 487. 493. ... 533. 535. 536.]
 [482. 475. 490. ... 415. 417. 418.]
 [470. 470. 478. ... 312. 314. 315.]]


In [None]:
# Create MNE Raw object
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
raw = mne.io.RawArray(eeg_data, info)

Creating RawArray with float64 data, n_channels=4, n_times=19196423
    Range : 0 ... 19196422 =      0.000 ... 76785.688 secs
Ready.


In [None]:
raw_filt=raw.copy().filter(l_freq=1, h_freq=30)
raw_filt.info['sfreq']

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 30 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 30.00 Hz
- Upper transition bandwidth: 7.50 Hz (-6 dB cutoff frequency: 33.75 Hz)
- Filter length: 825 samples (3.300 s)



[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    6.7s finished


250.0

In [None]:
df.columns = ['ID', 'Timestamp', 'Subject', 'Channel', 'Stimulus', 'Count', 'Signal']
raw_series = df['Signal']

time_series = raw_series.apply(lambda x: np.array([float(i) for i in x.split(',')]))
df['Signal'] = time_series

median_length = int(np.median(df['Count']))

def resample_signal(signal, target_length):
    original = np.arange(len(signal))
    target = np.linspace(0, len(signal)-1, target_length)
    return interp1d(original, signal, kind='linear')(target)

df['Signal'] = df['Signal'].apply(lambda s: resample_signal(s, median_length))

# Divide into groups of 4 rows
epochs_data = []
for i in range(0, len(df) - 3, 4):  # step by 4
    chunk = df.iloc[i:i+4]
    if len(chunk) < 4:
        continue  # skip incomplete chunks
    signals = np.stack(chunk['Signal'].values)  # shape: (4, median_length)
    epochs_data.append(signals)

epochs_data = np.array(epochs_data)  # shape: (n_chunks, 4, median_length)
print(f"Shape: {epochs_data.shape}")


def interpolate_nans(epoch):
    for i in range(epoch.shape[0]):
        if np.isnan(epoch[i]).any():
            x = np.arange(epoch.shape[1])
            y = epoch[i]
            nan_mask = np.isnan(y)
            y[nan_mask] = np.interp(x[nan_mask], x[~nan_mask], y[~nan_mask])
    return epoch

epochs_data = np.array([interpolate_nans(e) for e in epochs_data])

from sklearn.preprocessing import MinMaxScaler

n_channels = epochs_data.shape[1]
for ch in range(n_channels):
    scaler = MinMaxScaler()
    # Reshape for scaler: (n_epochs, n_times)
    reshaped = epochs_data[:, ch, :]
    reshaped_scaled = scaler.fit_transform(reshaped)
    epochs_data[:, ch, :] = reshaped_scaled


print(f"Cleaned dataset shape: {epochs_data.shape} (epochs, channels, times)")
print(f"Stimulus labels (example): {labels[:5]}")



Shape: (40983, 4, 460)
Cleaned dataset shape: (40983, 4, 460) (epochs, channels, times)
Stimulus labels (example): [3 9 2 3 6]
