# Split EEG Signals into Individual Channels

This notebook takes the processed EEG epochs and splits them into individual channel files.

**Input**: `processed_epochs/` folder with processed MNE epoch files  
**Output**: `individual_channels/` folder with separate files for each channel

## Output Structure:
```
individual_channels/
├── by_epoch/
│   ├── sub-01_ses-01_trial_000_Inner_Up/
│   │   ├── A1.npy
│   │   ├── A2.npy
│   │   └── ... (128 channel files)
│   └── ...
├── by_channel/
│   ├── A1/
│   │   ├── sub-01_ses-01_trial_000_Inner_Up.npy
│   │   └── ...
│   └── ...
└── metadata/
    ├── channel_files_metadata.csv
    └── split_summary.txt
```

In [7]:
import mne
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import warnings

# Configure settings
mne.set_log_level('WARNING')
warnings.filterwarnings('ignore', category=RuntimeWarning)

print("Libraries imported successfully!")

Libraries imported successfully!


In [8]:
# Configuration
INPUT_PATH = r"D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\processed_epochs"
OUTPUT_PATH = r"D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels"


# Create output directory structure
os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(f"{OUTPUT_PATH}/by_epoch", exist_ok=True)      # Organized by epoch
os.makedirs(f"{OUTPUT_PATH}/by_channel", exist_ok=True)    # Organized by channel
os.makedirs(f"{OUTPUT_PATH}/metadata", exist_ok=True)      # Metadata files

print(f"Input: {INPUT_PATH}")
print(f"Output: {OUTPUT_PATH}")
print(f"\nOutput structure:")
print(f"  {OUTPUT_PATH}/")
print(f"    ├── by_epoch/     (folders per epoch, files per channel)")
print(f"    ├── by_channel/   (folders per channel, files per epoch)")
print(f"    └── metadata/     (CSV files and summaries)")

# Check input exists
if not os.path.exists(INPUT_PATH):
    print(f"\n❌ ERROR: {INPUT_PATH} not found!")
    print("Run the epoch processing notebook first.")
else:
    print(f"\n✓ Input directory found")

Input: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\processed_epochs
Output: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels

Output structure:
  D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/
    ├── by_epoch/     (folders per epoch, files per channel)
    ├── by_channel/   (folders per channel, files per epoch)
    └── metadata/     (CSV files and summaries)

✓ Input directory found


In [9]:
# Load processed metadata
metadata_file = f"{INPUT_PATH}/metadata/processed_epochs_metadata.pkl"

if os.path.exists(metadata_file):
    processed_metadata = pd.read_pickle(metadata_file)
    print(f"Loaded metadata for {len(processed_metadata)} processed epochs")
    print(f"Duration: {processed_metadata['duration_seconds'].iloc[0]:.2f}s")
    print(f"Time points: {processed_metadata['n_timepoints'].iloc[0]}")
    print(f"Channels: {processed_metadata['n_channels'].iloc[0]}")
    print(f"Sampling frequency: {processed_metadata['sampling_frequency'].iloc[0]} Hz")
else:
    print(f"❌ ERROR: Processed metadata not found at {metadata_file}")
    raise FileNotFoundError("Run epoch processing first")

Loaded metadata for 5440 processed epochs
Duration: 2.50s
Time points: 641
Channels: 128
Sampling frequency: 256.0 Hz


In [10]:
# Get channel names from first epoch
sample_epoch_file = processed_metadata['file_path'].iloc[0]
sample_epoch = mne.read_epochs(sample_epoch_file, verbose=False)
channel_names = sample_epoch.ch_names
n_channels = len(channel_names)
n_timepoints = sample_epoch.get_data().shape[2]
sfreq = sample_epoch.info['sfreq']

print(f"Channel information:")
print(f"  Total channels: {n_channels}")
print(f"  Time points per channel: {n_timepoints}")
print(f"  Sampling frequency: {sfreq} Hz")
print(f"  First 10 channels: {channel_names[:10]}")
print(f"  Last 10 channels: {channel_names[-10:]}")

# Create channel directories
for channel in channel_names:
    os.makedirs(f"{OUTPUT_PATH}/by_channel/{channel}", exist_ok=True)

print(f"\n✓ Created {n_channels} channel directories")

Channel information:
  Total channels: 128
  Time points per channel: 641
  Sampling frequency: 256.0 Hz
  First 10 channels: ['A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10']
  Last 10 channels: ['D23', 'D24', 'D25', 'D26', 'D27', 'D28', 'D29', 'D30', 'D31', 'D32']

✓ Created 128 channel directories


In [11]:
def split_epoch_channels(row, output_base_path):
    """
    Splits a single epoch into individual channel files, with filenames
    containing class and speech type. The files are saved in a directory
    structure organized by epoch ID.

    Parameters:
    - row (pd.Series): A row from the metadata DataFrame containing all
                       necessary info for one epoch (file_path, epoch_id,
                       class, speech_type).
    - output_base_path (str): The base directory where files will be saved.

    Returns:
    - tuple: A tuple containing:
        - success (bool): True if processing was successful, False otherwise.
        - channel_files (list): A list of dictionaries, where each dictionary
                                contains metadata about a newly created channel file.
    """
    try:
        # --- Extract metadata from the row ---
        epoch_file = row['file_path']
        epoch_id = row['epoch_id']
        speech_type = row['speech_type']
        # Use 'class_label' to avoid conflict with the Python keyword 'class'
        class_label = row['class']

        # --- Load epoch data ---
        # We read the .fif file which contains the single epoch
        epoch = mne.read_epochs(epoch_file, verbose=False)
        # Get the data, shape is (1, n_channels, n_timepoints)
        # and remove the first dimension.
        data = epoch.get_data()[0]  # Final shape: (n_channels, n_timepoints)

        channel_files_metadata = []

        # --- Create the output directory for this epoch ---
        # All channels for this epoch will be saved here.
        epoch_dir = os.path.join(output_base_path, "by_epoch", str(epoch_id))
        os.makedirs(epoch_dir, exist_ok=True)

        # --- Iterate through each channel, save it, and record metadata ---
        for ch_idx, channel_name in enumerate(epoch.ch_names):
            channel_data = data[ch_idx]  # Shape: (n_timepoints,)

            # Sanitize the channel name to ensure it's a valid filename component
            safe_channel_name = "".join(c for c in channel_name if c.isalnum() or c in ('-', '_')).rstrip()

            # --- Define the new filename and full path ---
            # Format: {epoch_id}_{speech_type}_{class}_{channel_name}.npy
            file_name = f"{epoch_id}_{speech_type}_{class_label}_{safe_channel_name}.npy"
            output_file_path = os.path.join(epoch_dir, file_name)

            # Save the individual channel data as a .npy file
            np.save(output_file_path, channel_data)

            # --- Record metadata for the file we just created ---
            # This information can be used to build a new master CSV file later.
            channel_files_metadata.append({
                'channel_index': ch_idx,
                'channel_name': channel_name,
                'new_file_path': output_file_path,
                'new_file_name': file_name,
                # Add some basic stats about the channel data
                'data_shape': channel_data.shape,
                'data_min': float(channel_data.min()),
                'data_max': float(channel_data.max()),
                'data_mean': float(channel_data.mean()),
                'data_std': float(channel_data.std())
            })

        # Return success and the list of metadata for all processed channels
        return True, channel_files_metadata

    except Exception as e:
        # If anything goes wrong, print the error and return failure
        print(f"Error processing epoch_id {row.get('epoch_id', 'N/A')} from file {row.get('file_path', 'N/A')}: {e}")
        return False, []

# ==============================================================================
# MAIN PROCESSING SCRIPT
# ==============================================================================
# Assume 'processed_metadata' is a pandas DataFrame loaded with your epoch info
# and 'OUTPUT_PATH' is the base path for your results.
# For example:
# OUTPUT_PATH = "path/to/your/output"
# processed_metadata = pd.read_csv("path/to/your/metadata.csv")
# n_channels = 64 # Example channel count

# --- Mock data for demonstration if you don't have it loaded ---
# Create a dummy metadata DataFrame for testing purposes
if 'processed_metadata' not in locals():
    print("Creating dummy `processed_metadata` DataFrame for demonstration.")
    processed_metadata = pd.DataFrame({
        'file_path': ['dummy_epoch_1.fif', 'dummy_epoch_2.fif'],
        'epoch_id': [101, 102],
        'subject_name': ['sub-01', 'sub-01'],
        'subject_number': [1, 1],
        'session_number': [1, 1],
        'trial_number': [1, 2],
        'speech_type': ['imagined', 'vocalized'],
        'class': ['left', 'right'],
        'class_id': [0, 1],
        'timestamp': [pd.Timestamp.now(), pd.Timestamp.now()],
        'sampling_frequency': [512, 512],
        'duration_seconds': [2.0, 2.0],
        'time_start': [0.0, 0.0],
        'time_end': [2.0, 2.0],
        'freq_low': [1.0, 1.0],
        'freq_high': [40.0, 40.0]
    })
    # Create dummy .fif files
    n_channels = 64
    n_times = 1024 # 2 seconds at 512 Hz
    sfreq = 512
    ch_names = [f'EEG {i:03}' for i in range(n_channels)]
    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types='eeg')
    dummy_data = np.random.randn(1, n_channels, n_times)
    for f in processed_metadata['file_path']:
        mne.EpochsArray(dummy_data, info).save(f, overwrite=True)
    print("Dummy files created.\n")

if 'OUTPUT_PATH' not in locals():
    OUTPUT_PATH = "./eeg_output"
    print(f"Setting `OUTPUT_PATH` to '{OUTPUT_PATH}' for demonstration.\n")
# --- End of mock data section ---


print(f"Splitting {len(processed_metadata)} epochs into individual channels...")
print(f"Output will be saved in: {os.path.join(OUTPUT_PATH, 'by_epoch')}")

all_channels_metadata = []
success_count = 0
fail_count = 0

# Use tqdm for a progress bar
for _, row in tqdm(processed_metadata.iterrows(), total=len(processed_metadata), desc="Splitting channels"):
    success, channel_files_list = split_epoch_channels(row, OUTPUT_PATH)

    if success:
        # For each channel file created, merge the original epoch metadata
        # with the new channel-specific metadata.
        for channel_meta in channel_files_list:
            # Combine original row (as a dict) with the new metadata
            full_record = {**row.to_dict(), **channel_meta}
            all_channels_metadata.append(full_record)
        success_count += 1
    else:
        fail_count += 1

print("\n=== CHANNEL SPLITTING COMPLETE ===")
print(f"✓ Successfully processed: {success_count} epochs")
print(f"✗ Failed to process:    {fail_count} epochs")
if (success_count + fail_count) > 0:
    success_rate = (success_count / (success_count + fail_count)) * 100
    print(f"✓ Total channel files created: {len(all_channels_metadata)}")
    print(f"► Success rate: {success_rate:.1f}%")

# You can now create a new, detailed DataFrame with all the information
if all_channels_metadata:
    final_metadata_df = pd.DataFrame(all_channels_metadata)
    print(f"\nCreated a new metadata DataFrame with {len(final_metadata_df)} entries.")
    # Save the final metadata to a CSV file for future use
    final_csv_path = os.path.join(OUTPUT_PATH, "channels_metadata.csv")
    final_metadata_df.to_csv(final_csv_path, index=False)
    print(f"✓ Final metadata saved to: {final_csv_path}")
    # Display the first few rows of the new DataFrame
    # display(final_metadata_df.head())


Splitting 5440 epochs into individual channels...
Output will be saved in: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels\by_epoch


Splitting channels:   0%|          | 0/5440 [00:00<?, ?it/s]

Splitting channels: 100%|██████████| 5440/5440 [27:25<00:00,  3.31it/s] 



=== CHANNEL SPLITTING COMPLETE ===
✓ Successfully processed: 5440 epochs
✗ Failed to process:    0 epochs
✓ Total channel files created: 696320
► Success rate: 100.0%

Created a new metadata DataFrame with 696320 entries.
✓ Final metadata saved to: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels\channels_metadata.csv


In [12]:
# Analyze results
if len(all_channels_metadata) > 0:
    channel_df = pd.DataFrame(all_channels_metadata)
    
    print("=== CHANNEL SPLITTING RESULTS ===")
    print(f"Total channel files: {len(channel_df)}")
    print(f"Unique epochs: {channel_df['epoch_id'].nunique()}")
    print(f"Unique channels: {channel_df['channel_name'].nunique()}")
    print(f"Files per epoch: {len(channel_df) // channel_df['epoch_id'].nunique()}")
    print(f"Files per channel: {len(channel_df) // channel_df['channel_name'].nunique()}")
    
    print("\n=== DATA STATISTICS ===")
    print(f"Data shape per channel: {channel_df['data_shape'].iloc[0]}")
    print(f"Data range: {channel_df['data_min'].min():.2e} to {channel_df['data_max'].max():.2e}")
    print(f"Average mean: {channel_df['data_mean'].mean():.2e}")
    print(f"Average std: {channel_df['data_std'].mean():.2e}")
    
    print("\n=== DISTRIBUTION CHECK ===")
    print("Files per speech type:")
    speech_counts = channel_df['speech_type'].value_counts()
    for speech_type, count in speech_counts.items():
        print(f"  {speech_type}: {count} files ({count//n_channels} epochs)")
    
    print("\nFiles per class:")
    class_counts = channel_df['class'].value_counts()
    for class_name, count in class_counts.items():
        print(f"  {class_name}: {count} files ({count//n_channels} epochs)")
    
    print("\nFiles per subject:")
    subject_counts = channel_df['subject_number'].value_counts().sort_index()
    for subject, count in subject_counts.head(5).items():
        print(f"  Subject {subject}: {count} files ({count//n_channels} epochs)")
    
   
else:
    print("❌ No channel files were created!")

=== CHANNEL SPLITTING RESULTS ===
Total channel files: 696320
Unique epochs: 5440
Unique channels: 128
Files per epoch: 128
Files per channel: 5440

=== DATA STATISTICS ===
Data shape per channel: (641,)
Data range: -6.03e-04 to 5.37e-04
Average mean: -4.81e-10
Average std: 4.66e-06

=== DISTRIBUTION CHECK ===
Files per speech type:
  Visualized: 281088 files (2196 epochs)
  Inner: 275968 files (2156 epochs)
  Pronounced: 139264 files (1088 epochs)

Files per class:
  Left: 174080 files (1360 epochs)
  Up: 174080 files (1360 epochs)
  Right: 174080 files (1360 epochs)
  Down: 174080 files (1360 epochs)

Files per subject:
  Subject 1: 64000 files (500 epochs)
  Subject 2: 76800 files (600 epochs)
  Subject 3: 64000 files (500 epochs)
  Subject 4: 76800 files (600 epochs)
  Subject 5: 76800 files (600 epochs)


In [13]:
# Test loading individual channel files
if len(all_channels_metadata) > 0:
    print("=== TESTING CHANNEL FILES ===")
    
    # Test first 3 channel files
    test_files = channel_df.head(3)
    
    for i, (_, row) in enumerate(test_files.iterrows()):
        print(f"\nTest {i+1}: {row['epoch_id']} - {row['channel_name']}")
        
        try:
            # Test by_epoch file
            data_epoch = np.load(row['by_epoch_path'])
            print(f"  ✓ by_epoch file loaded: {data_epoch.shape}")
            
            # Test by_channel file
            data_channel = np.load(row['by_channel_path'])
            print(f"  ✓ by_channel file loaded: {data_channel.shape}")
            
            # Verify they're identical
            if np.array_equal(data_epoch, data_channel):
                print(f"  ✓ Files are identical")
            else:
                print(f"  ⚠ Files differ!")
            
            print(f"  Data range: {data_epoch.min():.2e} to {data_epoch.max():.2e}")
            print(f"  Duration: {len(data_epoch) / row['sampling_frequency']:.2f}s")
            
        except Exception as e:
            print(f"  ✗ Error loading files: {e}")
            
    # Test loading all files for one channel
    print(f"\n=== TESTING CHANNEL COLLECTION ===")
    test_channel = channel_names[0]  # First channel
    channel_files = channel_df[channel_df['channel_name'] == test_channel]
    
    print(f"Testing channel '{test_channel}' with {len(channel_files)} files")
    
    # Load first 3 files for this channel
    for i, (_, row) in enumerate(channel_files.head(3).iterrows()):
        try:
            data = np.load(row['by_channel_path'])
            print(f"  File {i+1}: {row['epoch_id']} - Shape: {data.shape}, Range: {data.min():.2e} to {data.max():.2e}")
        except Exception as e:
            print(f"  File {i+1}: Error - {e}")
            
else:
    print("No channel files to test")

=== TESTING CHANNEL FILES ===

Test 1: sub-01_ses-01_trial_000 - A1
  ✗ Error loading files: 'by_epoch_path'

Test 2: sub-01_ses-01_trial_000 - A2
  ✗ Error loading files: 'by_epoch_path'

Test 3: sub-01_ses-01_trial_000 - A3
  ✗ Error loading files: 'by_epoch_path'

=== TESTING CHANNEL COLLECTION ===
Testing channel 'A1' with 5440 files
  File 1: Error - 'by_channel_path'
  File 2: Error - 'by_channel_path'
  File 3: Error - 'by_channel_path'


In [14]:
# Save metadata and create summary
if len(all_channels_metadata) > 0:
    # Save channel files metadata
    csv_file = f"{OUTPUT_PATH}/metadata/channel_files_metadata.csv"
    pkl_file = f"{OUTPUT_PATH}/metadata/channel_files_metadata.pkl"
    summary_file = f"{OUTPUT_PATH}/metadata/split_summary.txt"
    
    channel_df.to_csv(csv_file, index=False)
    channel_df.to_pickle(pkl_file)
    
    # Create summary
    with open(summary_file, 'w') as f:
        f.write("CHANNEL SPLITTING SUMMARY\n")
        f.write("=" * 40 + "\n\n")
        
        f.write(f"Input: {INPUT_PATH}\n")
        f.write(f"Output: {OUTPUT_PATH}\n\n")
        
        f.write(f"Results:\n")
        f.write(f"  Successfully processed epochs: {success_count}\n")
        f.write(f"  Failed epochs: {fail_count}\n")
        f.write(f"  Total channel files created: {len(channel_df)}\n")
        f.write(f"  Success rate: {success_count/(success_count+fail_count)*100:.1f}%\n\n")
        
        f.write(f"Dataset Info:\n")
        f.write(f"  Unique epochs: {channel_df['epoch_id'].nunique()}\n")
        f.write(f"  Unique channels: {channel_df['channel_name'].nunique()}\n")
        f.write(f"  Files per epoch: {len(channel_df) // channel_df['epoch_id'].nunique()}\n")
        f.write(f"  Files per channel: {len(channel_df) // channel_df['channel_name'].nunique()}\n")
        f.write(f"  Data shape per file: {channel_df['data_shape'].iloc[0]}\n")
        f.write(f"  Sampling frequency: {channel_df['sampling_frequency'].iloc[0]} Hz\n")
        f.write(f"  Duration per file: {channel_df['duration_seconds'].iloc[0]} seconds\n\n")
        
        f.write(f"File Organization:\n")
        f.write(f"  by_epoch/: {channel_df['epoch_id'].nunique()} folders, {n_channels} files each\n")
        f.write(f"  by_channel/: {n_channels} folders, {channel_df['epoch_id'].nunique()} files each\n\n")
        
        f.write(f"Speech Type Distribution:\n")
        for speech_type, count in channel_df['speech_type'].value_counts().items():
            f.write(f"  {speech_type}: {count} files ({count//n_channels} epochs)\n")
        
        f.write(f"\nClass Distribution:\n")
        for class_name, count in channel_df['class'].value_counts().items():
            f.write(f"  {class_name}: {count} files ({count//n_channels} epochs)\n")
        
        f.write(f"\nChannel Names:\n")
        for i, channel in enumerate(channel_names):
            if i < 10 or i >= len(channel_names) - 10:
                f.write(f"  {channel}\n")
            elif i == 10:
                f.write(f"  ... ({len(channel_names) - 20} more channels) ...\n")
    
    print(f"\n=== FILES SAVED ===")
    print(f"✓ {len(channel_df)} channel files (.npy format)")
    print(f"✓ Metadata: {csv_file}")
    print(f"✓ Metadata: {pkl_file}")
    print(f"✓ Summary: {summary_file}")
    
    print(f"\n=== USAGE EXAMPLES ===")
    print(f"\n1. Load single channel file:")
    print(f"   data = np.load('individual_channels/by_epoch/epoch_id/channel.npy')")
    print(f"   # Shape: ({n_timepoints},) - time series for one channel")
    
    print(f"\n2. Load all channels for one epoch:")
    print(f"   epoch_folder = 'individual_channels/by_epoch/sub-01_ses-01_trial_000_Inner_Up/'")
    print(f"   channels = {{}}")
    print(f"   for ch in channel_names:")
    print(f"       channels[ch] = np.load(f'{{epoch_folder}}/{{ch}}.npy')")
    
    print(f"\n3. Load all epochs for one channel:")
    print(f"   channel_folder = 'individual_channels/by_channel/A1/'")
    print(f"   epochs = []")
    print(f"   for file in os.listdir(channel_folder):")
    print(f"       epochs.append(np.load(f'{{channel_folder}}/{{file}}'))")
    
    print(f"\n4. Use metadata for filtering:")
    print(f"   metadata = pd.read_csv('{csv_file}')")
    print(f"   inner_speech_A1 = metadata[")
    print(f"       (metadata['speech_type'] == 'Inner') & ")
    print(f"       (metadata['channel_name'] == 'A1')")
    print(f"   ]")
    
    print(f"\n🎉 CHANNEL SPLITTING COMPLETE!")
    print(f"Your data is now organized as:")
    print(f"  • {len(channel_df)} individual channel files")
    print(f"  • {channel_df['epoch_id'].nunique()} epochs × {n_channels} channels")
    print(f"  • Organized by epoch AND by channel")
    print(f"  • Each file: {n_timepoints} time points, {channel_df['duration_seconds'].iloc[0]:.2f}s duration")
    print(f"  • Ready for channel-specific analysis in: {OUTPUT_PATH}")
    
else:
    print("❌ No channel files to save")


=== FILES SAVED ===
✓ 696320 channel files (.npy format)
✓ Metadata: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/metadata/channel_files_metadata.csv
✓ Metadata: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/metadata/channel_files_metadata.pkl
✓ Summary: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/metadata/split_summary.txt

=== USAGE EXAMPLES ===

1. Load single channel file:
   data = np.load('individual_channels/by_epoch/epoch_id/channel.npy')
   # Shape: (641,) - time series for one channel

2. Load all channels for one epoch:
   epoch_folder = 'individual_channels/by_epoch/sub-01_ses-01_trial_000_Inner_Up/'
   channels = {}
   for ch in channel_names:
       channels[ch] = np.load(f'{epoch_folder}/{ch}.npy')

3. Load all epochs for one channel:
   channel_folder = 'individual_channels/by_channel/A1/'
   epochs = []
   for file in os.li