In [None]:
import mne
import os

# load processed fif file
processed_dir = os.path.join('..', '..', 'processed')
raw = mne.io.read_raw_fif(os.path.join(processed_dir, 'resting_state_cleaned.fif'), preload=True)

raw

Opening raw data file ..\..\processed\resting_state_cleaned.fif...
    Range : 0 ... 205856 =      0.000 ...   411.712 secs
Ready.
Reading 0 ... 205856  =      0.000 ...   411.712 secs...
    Range : 0 ... 205856 =      0.000 ...   411.712 secs
Ready.
Reading 0 ... 205856  =      0.000 ...   411.712 secs...


  raw = mne.io.read_raw_fif(os.path.join(processed_dir, 'resting_state_cleaned.fif'), preload=True)


Unnamed: 0,General,General.1
,Filename(s),resting_state_cleaned.fif
,MNE object type,Raw
,Measurement date,Unknown
,Participant,Unknown
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,00:06:52 (HH:MM:SS)
,Sampling frequency,500.00 Hz
,Time points,205857
,Channels,Channels


In [None]:
# separate tasks based on annotations
events, event_id = mne.events_from_annotations(raw)

task_1_events = events[events[:, 2] == event_id['instructed_toCloseEyes']]
task_2_events = events[events[:, 2] == event_id['instructed_toOpenEyes']]

Used Annotations descriptions: ['break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']


In [24]:
# Method to extract and save segments for any event type
import numpy as np
import os

def extract_and_save_segments(raw, events, event_name, segment_duration=1.0, output_dir='../../gan_data'):
    """
    Extract EEG segments from events and save for GAN training
    
    Parameters:
    -----------
    raw : mne.Raw
        The raw EEG data
    events : numpy array
        Event array from MNE (n_events, 3)
    event_name : str
        Name for this event type (e.g., 'eyes_closed', 'eyes_open')
    segment_duration : float
        Duration of each segment in seconds
    output_dir : str
        Directory to save the data
    
    Returns:
    --------
    normalized_segments : numpy array
        The normalized segments ready for GAN training
    metadata : dict
        Metadata about the segments
    """
    
    sfreq = raw.info['sfreq']
    segment_samples = int(sfreq * segment_duration)
    
    print(f"\n{'='*60}")
    print(f"EXTRACTING {event_name.upper()} SEGMENTS")
    print(f"{'='*60}")
    print(f"Segment duration: {segment_duration} sec ({segment_samples} samples)")
    
    # Extract segments
    segments = []
    skipped = 0
    
    for event in events:
        start_sample = event[0]
        end_sample = start_sample + segment_samples
        
        if end_sample <= raw.n_times:
            segment = raw.get_data(start=start_sample, stop=end_sample)
            segments.append(segment)
        else:
            skipped += 1
    
    if skipped > 0:
        print(f"⚠ Skipped {skipped} events (too close to end)")
    
    segments = np.array(segments)
    print(f"✓ Extracted {len(segments)} segments")
    print(f"  Shape: {segments.shape} (n_segments, n_channels, n_timepoints)")
    
    # Normalize segments
    print("\nNormalizing segments...")
    normalized_segments = []
    
    for segment in segments:
        mean = segment.mean(axis=1, keepdims=True)
        std = segment.std(axis=1, keepdims=True) + 1e-8
        normalized = (segment - mean) / std
        normalized_segments.append(normalized)
    
    normalized_segments = np.array(normalized_segments).astype(np.float32)
    print(f"✓ Normalized: Mean={normalized_segments.mean():.4f}, Std={normalized_segments.std():.4f}")
    
    # Save data in multiple formats
    os.makedirs(output_dir, exist_ok=True)
    
    # 1. NumPy format (for GAN training)
    data_path_npy = os.path.join(output_dir, f'{event_name}_segments.npy')
    np.save(data_path_npy, normalized_segments)
    print(f"✓ Saved NPY: {data_path_npy}")
    
    # 2. FIF format (MNE native - preserves all info)
    n_segments, n_channels, n_timepoints = normalized_segments.shape
    concatenated_data = normalized_segments.transpose(1, 0, 2).reshape(n_channels, n_segments * n_timepoints)
    info = mne.create_info(ch_names=raw.ch_names, sfreq=sfreq, ch_types='eeg')
    raw_segments = mne.io.RawArray(concatenated_data, info)
    
    data_path_fif = os.path.join(output_dir, f'{event_name}_segments.fif')
    raw_segments.save(data_path_fif, overwrite=True)
    print(f"✓ Saved FIF: {data_path_fif}")
    
    # Save metadata
    metadata = {
        'n_segments': len(normalized_segments),
        'n_channels': normalized_segments.shape[1],
        'n_timepoints': normalized_segments.shape[2],
        'sfreq': sfreq,
        'segment_duration': segment_duration,
        'ch_names': raw.ch_names,
        'event_name': event_name
    }
    
    metadata_path = os.path.join(output_dir, f'{event_name}_metadata.npy')
    np.save(metadata_path, metadata)
    print(f"✓ Saved metadata: {metadata_path}")
    
    # Summary
    print(f"\n{'='*60}")
    print(f"SUMMARY: {event_name.upper()}")
    print(f"{'='*60}")
    print(f"Total segments: {metadata['n_segments']}")
    print(f"Channels: {metadata['n_channels']}")
    print(f"Timepoints per segment: {metadata['n_timepoints']}")
    print(f"Sampling rate: {metadata['sfreq']} Hz")
    print(f"Segment duration: {metadata['segment_duration']} sec")
    print(f"\nSaved in formats:")
    print(f"  • NPY (NumPy) - for Python/PyTorch GANs")
    print(f"  • FIF (MNE)   - for MNE analysis")
    print(f"{'='*60}\n")
    
    return normalized_segments, metadata

# Extract and save EYES CLOSED segments
eyes_closed_segments, eyes_closed_meta = extract_and_save_segments(
    raw=raw,
    events=task_1_events,
    event_name='eyes_closed',
    segment_duration=1.0
)

# Extract and save EYES OPEN segments
eyes_open_segments, eyes_open_meta = extract_and_save_segments(
    raw=raw,
    events=task_2_events,
    event_name='eyes_open',
    segment_duration=1.0
)


EXTRACTING EYES_CLOSED SEGMENTS
Segment duration: 1.0 sec (500 samples)
✓ Extracted 5 segments
  Shape: (5, 129, 500) (n_segments, n_channels, n_timepoints)

Normalizing segments...
✓ Normalized: Mean=0.0000, Std=0.9799
✓ Saved NPY: ../../gan_data\eyes_closed_segments.npy
Creating RawArray with float64 data, n_channels=129, n_times=2500
    Range : 0 ... 2499 =      0.000 ...     4.998 secs
Ready.
    Range : 0 ... 2499 =      0.000 ...     4.998 secs
Ready.


Writing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_closed_segments.fif
Closing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_closed_segments.fif
[done]
Closing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_closed_segments.fif
[done]
✓ Saved FIF: ../../gan_data\eyes_closed_segments.fif
✓ Saved metadata: ../../gan_data\eyes_closed_metadata.npy

SUMMARY: EYES_CLOSED
Total segments: 5
Channels: 129
Timepoints per segment: 500
Sampling rate: 500.0 Hz
Segment duration: 1.0 sec

Saved in formats:
  • NPY (NumPy) - for Python/PyTorch GANs
  • FIF (MNE)   - for MNE analysis


EXTRACTING EYES_OPEN SEGMENTS
Segment duration: 1.0 sec (500 samples)
⚠ Skipped 1 events (too close to end)
✓ Extracted 5 segments
  Shape: (5, 129, 500) (n_segments, n_channels, n_timepoints)

Normalizing segments...
✓ Normalized: Mean=-0.0000, Std=0.9798
✓ Saved NPY: ../../gan_data\eyes_open_segments.npy
Creating RawArray with float64 

  raw_segments.save(data_path_fif, overwrite=True)
  raw_segments.save(data_path_fif, overwrite=True)
