In [6]:
import mne
import os
import glob

# load processed fif file
processed_dir = os.path.join('..', '..', 'processed')
resting_files = glob.glob(os.path.join(processed_dir, "*resting_state*.fif"))

print(f"Found {len(resting_files)} resting state files:")
for f in resting_files:
    print(f"  - {os.path.basename(f)}")

# Load all files
raw_list = []
for file_path in resting_files:
    print(f"\nLoading {os.path.basename(file_path)}...")
    r = mne.io.read_raw_fif(file_path, preload=True)
    raw_list.append(r)
    print(f"  ✓ Channels: {len(r.ch_names)}, Duration: {r.times[-1]:.1f}s")

# Concatenate all files into one continuous recording
if len(raw_list) > 1:
    raw = mne.concatenate_raws(raw_list)
    print(f"\n✓ Combined {len(raw_list)} files into one continuous recording")
else:
    raw = raw_list[0]

print(f"\nFinal combined data:")
print(f"  Total duration: {raw.times[-1]:.1f} seconds")
print(f"  Channels: {len(raw.ch_names)}")
print(f"  Sampling rate: {raw.info['sfreq']} Hz")

Found 5 resting state files:
  - resting_state_cleaned_0.fif
  - resting_state_cleaned_1.fif
  - resting_state_cleaned_2.fif
  - resting_state_cleaned_3.fif
  - resting_state_cleaned_4.fif

Loading resting_state_cleaned_0.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_0.fif...
    Range : 0 ... 878478 =      0.000 ...  1756.956 secs
Ready.
Reading 0 ... 878478  =      0.000 ...  1756.956 secs...
    Range : 0 ... 878478 =      0.000 ...  1756.956 secs
Ready.
Reading 0 ... 878478  =      0.000 ...  1756.956 secs...


  r = mne.io.read_raw_fif(file_path, preload=True)


  ✓ Channels: 129, Duration: 1757.0s

Loading resting_state_cleaned_1.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_1.fif...
    Range : 0 ... 178845 =      0.000 ...   357.690 secs
    Range : 0 ... 178845 =      0.000 ...   357.690 secs
Ready.
Reading 0 ... 178845  =      0.000 ...   357.690 secs...
Ready.
Reading 0 ... 178845  =      0.000 ...   357.690 secs...
  ✓ Channels: 129, Duration: 357.7s

Loading resting_state_cleaned_2.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_2.fif...
  ✓ Channels: 129, Duration: 357.7s

Loading resting_state_cleaned_2.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_2.fif...
    Range : 0 ... 174991 =      0.000 ...   349.982 secs
Ready.
Reading 0 ... 174991  =      0.000 ...   349.982 secs...
    Range : 0 ... 174991 =      0.000 ...   349.982 secs
Ready.
Reading 0 ... 174991  =      0.000 ...   349.982 secs...


  r = mne.io.read_raw_fif(file_path, preload=True)
  r = mne.io.read_raw_fif(file_path, preload=True)


  ✓ Channels: 129, Duration: 350.0s

Loading resting_state_cleaned_3.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_3.fif...
    Range : 0 ... 160241 =      0.000 ...   320.482 secs
Ready.
    Range : 0 ... 160241 =      0.000 ...   320.482 secs
Ready.
Reading 0 ... 160241  =      0.000 ...   320.482 secs...
Reading 0 ... 160241  =      0.000 ...   320.482 secs...
  ✓ Channels: 129, Duration: 320.5s

Loading resting_state_cleaned_4.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_4.fif...
    Range : 0 ... 158541 =      0.000 ...   317.082 secs
Ready.
  ✓ Channels: 129, Duration: 320.5s

Loading resting_state_cleaned_4.fif...
Opening raw data file ..\..\processed\resting_state_cleaned_4.fif...
    Range : 0 ... 158541 =      0.000 ...   317.082 secs
Ready.
Reading 0 ... 158541  =      0.000 ...   317.082 secs...
Reading 0 ... 158541  =      0.000 ...   317.082 secs...


  r = mne.io.read_raw_fif(file_path, preload=True)
  r = mne.io.read_raw_fif(file_path, preload=True)


  ✓ Channels: 129, Duration: 317.1s

✓ Combined 5 files into one continuous recording

Final combined data:
  Total duration: 3102.2 seconds
  Channels: 129
  Sampling rate: 500.0 Hz

✓ Combined 5 files into one continuous recording

Final combined data:
  Total duration: 3102.2 seconds
  Channels: 129
  Sampling rate: 500.0 Hz


In [20]:
# Extract events for each raw file separately
task_1_events_list = []
task_2_events_list = []

for raw in raw_list:
    events, event_dict = mne.events_from_annotations(raw)
    
    # Extract eyes closed events (event_id 3)
    task_1 = events[events[:, 2] == 3]
    task_1_events_list.append(task_1)
    
    # Extract eyes open events (event_id 4)
    task_2 = events[events[:, 2] == 4]
    task_2_events_list.append(task_2)

print(f"\n✓ Found events:")
print(f"  Eyes Closed: {sum(len(e) for e in task_1_events_list)} events")
print(f"  Eyes Open: {sum(len(e) for e in task_2_events_list)} events")

Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']

✓ Found events:
  Eyes Closed: 66 events
  Eyes Open: 74 events
Used Annotations descriptions: ['boundary', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'instructed_toOpenEyes', 'resting_start']
Used Annotations descriptions: ['boundary', 'break cnt', 'instructed_toCloseEyes', 'in

In [23]:
# Method to extract and save segments for any event type
import numpy as np
import os

def extract_and_save_segments_from_multiple(raw_list, events_list, event_name, segment_duration=1.0, output_dir='../../gan_data'):
    """
    Extract EEG segments from multiple raw files and combine
    
    Parameters:
    -----------
    raw_list : list of mne.Raw
        List of raw EEG data objects
    events_list : list of numpy arrays
        List of event arrays, one per raw file
    event_name : str
        Name for this event type (e.g., 'eyes_closed', 'eyes_open')
    segment_duration : float
        Duration of each segment in seconds
    output_dir : str
        Directory to save the data
    """
    
    print(f"\n{'='*60}")
    print(f"EXTRACTING {event_name.upper()} SEGMENTS FROM {len(raw_list)} FILES")
    print(f"{'='*60}")
    
    all_segments = []
    
    # Process each file
    for file_idx, (raw, events) in enumerate(zip(raw_list, events_list)):
        sfreq = raw.info['sfreq']
        segment_samples = int(sfreq * segment_duration)
        
        print(f"\nFile {file_idx + 1}/{len(raw_list)}: {len(events)} events")
        
        segments = []
        skipped = 0
        
        for event in events:
            start_sample = event[0]
            end_sample = start_sample + segment_samples
            
            if end_sample <= raw.n_times:
                segment = raw.get_data(start=start_sample, stop=end_sample)
                segments.append(segment)
            else:
                skipped += 1
        
        if skipped > 0:
            print(f"  ⚠ Skipped {skipped} events (too close to end)")
        
        print(f"  ✓ Extracted {len(segments)} segments")
        all_segments.extend(segments)
    
    # Combine all segments
    all_segments = np.array(all_segments)
    print(f"\n✓ Total extracted: {len(all_segments)} segments")
    print(f"  Shape: {all_segments.shape} (n_segments, n_channels, n_timepoints)")
    
    # Normalize segments
    print("\nNormalizing segments...")
    normalized_segments = []
    
    for segment in all_segments:
        mean = segment.mean(axis=1, keepdims=True)
        std = segment.std(axis=1, keepdims=True) + 1e-8
        normalized = (segment - mean) / std
        normalized_segments.append(normalized)
    
    normalized_segments = np.array(normalized_segments).astype(np.float32)
    print(f"✓ Normalized: Mean={normalized_segments.mean():.4f}, Std={normalized_segments.std():.4f}")
    
    # Save data in multiple formats
    os.makedirs(output_dir, exist_ok=True)
    
    # 1. NumPy format (for GAN training)
    data_path_npy = os.path.join(output_dir, f'{event_name}_segments.npy')
    np.save(data_path_npy, normalized_segments)
    print(f"✓ Saved NPY: {data_path_npy}")
    
    # 2. FIF format (MNE native - preserves all info)
    n_segments, n_channels, n_timepoints = normalized_segments.shape
    concatenated_data = normalized_segments.transpose(1, 0, 2).reshape(n_channels, n_segments * n_timepoints)
    info = mne.create_info(ch_names=raw_list[0].ch_names, sfreq=raw_list[0].info['sfreq'], ch_types='eeg')
    raw_segments = mne.io.RawArray(concatenated_data, info)
    
    data_path_fif = os.path.join(output_dir, f'{event_name}_segments.fif')
    raw_segments.save(data_path_fif, overwrite=True)
    print(f"✓ Saved FIF: {data_path_fif}")
    
    # 3. SAVE INDIVIDUAL SEGMENTS with indexes
    individual_dir = os.path.join(output_dir, event_name + '_individual')
    os.makedirs(individual_dir, exist_ok=True)
    
    print(f"\n✓ Saving individual segments to {individual_dir}...")
    for idx, segment in enumerate(normalized_segments):
        segment_path = os.path.join(individual_dir, f'{event_name}_segment_{idx:04d}.npy')
        np.save(segment_path, segment)
    
    print(f"✓ Saved {len(normalized_segments)} individual segment files")
    print(f"  Format: {event_name}_segment_0000.npy, {event_name}_segment_0001.npy, ...")
    
    # Save metadata
    metadata = {
        'n_segments': len(normalized_segments),
        'n_channels': normalized_segments.shape[1],
        'n_timepoints': normalized_segments.shape[2],
        'sfreq': raw_list[0].info['sfreq'],
        'segment_duration': segment_duration,
        'ch_names': raw_list[0].ch_names,
        'event_name': event_name,
        'n_files': len(raw_list),
        'individual_segments_dir': individual_dir
    }
    
    metadata_path = os.path.join(output_dir, f'{event_name}_metadata.npy')
    np.save(metadata_path, metadata)
    print(f"✓ Saved metadata: {metadata_path}")
    
    # Summary
    print(f"\n{'='*60}")
    print(f"SUMMARY: {event_name.upper()}")
    print(f"{'='*60}")
    print(f"Total segments: {metadata['n_segments']} (from {metadata['n_files']} files)")
    print(f"Channels: {metadata['n_channels']}")
    print(f"Timepoints per segment: {metadata['n_timepoints']}")
    print(f"Sampling rate: {metadata['sfreq']} Hz")
    print(f"Segment duration: {metadata['segment_duration']} sec")
    print(f"\nSaved formats:")
    print(f"  • Combined NPY: {event_name}_segments.npy")
    print(f"  • Combined FIF: {event_name}_segments.fif")
    print(f"  • Individual segments: {individual_dir}/")
    print(f"{'='*60}\n")
    
    return normalized_segments, metadata

# Extract and save EYES CLOSED segments from all files
eyes_closed_segments, eyes_closed_meta = extract_and_save_segments_from_multiple(
    raw_list=raw_list,
    events_list=task_1_events_list,
    event_name='eyes_closed',
    segment_duration=1.0
)

# Extract and save EYES OPEN segments from all files
eyes_open_segments, eyes_open_meta = extract_and_save_segments_from_multiple(
    raw_list=raw_list,
    events_list=task_2_events_list,
    event_name='eyes_open',
    segment_duration=1.0
)


EXTRACTING EYES_CLOSED SEGMENTS FROM 5 FILES

File 1/5: 45 events
  ✓ Extracted 45 segments

File 2/5: 6 events
  ✓ Extracted 6 segments

File 3/5: 5 events
  ✓ Extracted 5 segments

File 4/5: 5 events
  ✓ Extracted 5 segments

File 5/5: 5 events
  ✓ Extracted 5 segments

✓ Total extracted: 66 segments
  Shape: (66, 129, 500) (n_segments, n_channels, n_timepoints)

Normalizing segments...
✓ Normalized: Mean=0.0000, Std=0.9898
✓ Saved NPY: ../../gan_data\eyes_closed_segments.npy
Creating RawArray with float64 data, n_channels=129, n_times=33000
    Range : 0 ... 32999 =      0.000 ...    65.998 secs
Ready.
Writing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_closed_segments.fif
✓ Saved NPY: ../../gan_data\eyes_closed_segments.npy
Creating RawArray with float64 data, n_channels=129, n_times=33000
    Range : 0 ... 32999 =      0.000 ...    65.998 secs
Ready.
Writing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_closed_segments.fif

  raw_segments.save(data_path_fif, overwrite=True)


  ✓ Extracted 54 segments

File 2/5: 2 events
  ✓ Extracted 2 segments

File 3/5: 6 events
  ✓ Extracted 6 segments

File 4/5: 6 events
  ✓ Extracted 6 segments

File 5/5: 6 events
  ✓ Extracted 6 segments

✓ Total extracted: 74 segments
  Shape: (74, 129, 500) (n_segments, n_channels, n_timepoints)

Normalizing segments...
✓ Normalized: Mean=0.0000, Std=0.9901
✓ Saved NPY: ../../gan_data\eyes_open_segments.npy
Creating RawArray with float64 data, n_channels=129, n_times=37000
    Range : 0 ... 36999 =      0.000 ...    73.998 secs
Ready.
Writing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_open_segments.fif
    Range : 0 ... 36999 =      0.000 ...    73.998 secs
Ready.
Writing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_open_segments.fif
Closing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_open_segments.fif
[done]
Closing c:\Users\JINQI\Coursework\DATA495\notebooks\joseph-nb\..\..\gan_data\eyes_ope

  raw_segments.save(data_path_fif, overwrite=True)
