# Split EEG Signals into Individual Channels

This notebook takes the processed EEG epochs and splits them into individual channel files.

**Input**: `processed_epochs/` folder with processed MNE epoch files  
**Output**: `individual_channels/` folder with separate files for each channel

## Output Structure:
```
individual_channels/
├── by_epoch/
│   ├── sub-01_ses-01_trial_000_Inner_Up/
│   │   ├── A1.npy
│   │   ├── A2.npy
│   │   └── ... (128 channel files)
│   └── ...
├── by_channel/
│   ├── A1/
│   │   ├── sub-01_ses-01_trial_000_Inner_Up.npy
│   │   └── ...
│   └── ...
└── metadata/
    ├── channel_files_metadata.csv
    └── split_summary.txt
```

In [None]:
import mne
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import warnings

# Configure settings
mne.set_log_level('WARNING')
warnings.filterwarnings('ignore', category=RuntimeWarning)

print("Libraries imported successfully!")

In [None]:
# Configuration
INPUT_PATH = "processed_epochs"  # Input folder with processed epochs
OUTPUT_PATH = "individual_channels"  # Output folder for channel files

# Create output directory structure
os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(f"{OUTPUT_PATH}/by_epoch", exist_ok=True)      # Organized by epoch
os.makedirs(f"{OUTPUT_PATH}/by_channel", exist_ok=True)    # Organized by channel
os.makedirs(f"{OUTPUT_PATH}/metadata", exist_ok=True)      # Metadata files

print(f"Input: {INPUT_PATH}")
print(f"Output: {OUTPUT_PATH}")
print(f"\nOutput structure:")
print(f"  {OUTPUT_PATH}/")
print(f"    ├── by_epoch/     (folders per epoch, files per channel)")
print(f"    ├── by_channel/   (folders per channel, files per epoch)")
print(f"    └── metadata/     (CSV files and summaries)")

# Check input exists
if not os.path.exists(INPUT_PATH):
    print(f"\n❌ ERROR: {INPUT_PATH} not found!")
    print("Run the epoch processing notebook first.")
else:
    print(f"\n✓ Input directory found")

In [None]:
# Load processed metadata
metadata_file = f"{INPUT_PATH}/metadata/processed_epochs_metadata.pkl"

if os.path.exists(metadata_file):
    processed_metadata = pd.read_pickle(metadata_file)
    print(f"Loaded metadata for {len(processed_metadata)} processed epochs")
    print(f"Duration: {processed_metadata['duration_seconds'].iloc[0]:.2f}s")
    print(f"Time points: {processed_metadata['n_timepoints'].iloc[0]}")
    print(f"Channels: {processed_metadata['n_channels'].iloc[0]}")
    print(f"Sampling frequency: {processed_metadata['sampling_frequency'].iloc[0]} Hz")
else:
    print(f"❌ ERROR: Processed metadata not found at {metadata_file}")
    raise FileNotFoundError("Run epoch processing first")

In [None]:
# Get channel names from first epoch
sample_epoch_file = processed_metadata['file_path'].iloc[0]
sample_epoch = mne.read_epochs(sample_epoch_file, verbose=False)
channel_names = sample_epoch.ch_names
n_channels = len(channel_names)
n_timepoints = sample_epoch.get_data().shape[2]
sfreq = sample_epoch.info['sfreq']

print(f"Channel information:")
print(f"  Total channels: {n_channels}")
print(f"  Time points per channel: {n_timepoints}")
print(f"  Sampling frequency: {sfreq} Hz")
print(f"  First 10 channels: {channel_names[:10]}")
print(f"  Last 10 channels: {channel_names[-10:]}")

# Create channel directories
for channel in channel_names:
    os.makedirs(f"{OUTPUT_PATH}/by_channel/{channel}", exist_ok=True)

print(f"\n✓ Created {n_channels} channel directories")

In [None]:
def split_epoch_channels(epoch_file, epoch_id, output_base_path):
    """
    Split a single epoch into individual channel files.
    
    Parameters:
    - epoch_file: Path to the epoch .fif file
    - epoch_id: Unique identifier for the epoch
    - output_base_path: Base output directory
    
    Returns:
    - success: Boolean
    - channel_files: List of created channel file paths
    """
    try:
        # Load epoch
        epoch = mne.read_epochs(epoch_file, verbose=False)
        data = epoch.get_data()  # Shape: (1, n_channels, n_timepoints)
        
        # Remove the epoch dimension (we know it's 1)
        data = data[0]  # Shape: (n_channels, n_timepoints)
        
        channel_files = []
        
        # Create epoch directory
        epoch_dir = f"{output_base_path}/by_epoch/{epoch_id}"
        os.makedirs(epoch_dir, exist_ok=True)
        
        # Split into individual channels
        for ch_idx, channel_name in enumerate(epoch.ch_names):
            channel_data = data[ch_idx]  # Shape: (n_timepoints,)
            
            # Save in by_epoch structure
            epoch_channel_file = f"{epoch_dir}/{channel_name}.npy"
            np.save(epoch_channel_file, channel_data)
            
            # Save in by_channel structure
            channel_epoch_file = f"{output_base_path}/by_channel/{channel_name}/{epoch_id}.npy"
            np.save(channel_epoch_file, channel_data)
            
            channel_files.append({
                'epoch_id': epoch_id,
                'channel_name': channel_name,
                'channel_index': ch_idx,
                'by_epoch_path': epoch_channel_file,
                'by_channel_path': channel_epoch_file,
                'data_shape': channel_data.shape,
                'data_min': float(channel_data.min()),
                'data_max': float(channel_data.max()),
                'data_mean': float(channel_data.mean()),
                'data_std': float(channel_data.std())
            })
        
        return True, channel_files
        
    except Exception as e:
        print(f"Error processing {epoch_file}: {e}")
        return False, []

# Process all epochs
print(f"\nSplitting {len(processed_metadata)} epochs into individual channels...")
print(f"This will create {len(processed_metadata) * n_channels} channel files...")

all_channel_files = []
success_count = 0
fail_count = 0

for idx, row in tqdm(processed_metadata.iterrows(), total=len(processed_metadata), desc="Splitting channels"):
    epoch_file = row['file_path']
    epoch_id = row['epoch_id']
    
    success, channel_files = split_epoch_channels(epoch_file, epoch_id, OUTPUT_PATH)
    
    if success:
        # Add epoch metadata to each channel file record
        for channel_file in channel_files:
            channel_file.update({
                'subject_name': row['subject_name'],
                'subject_number': row['subject_number'],
                'session_number': row['session_number'],
                'trial_number': row['trial_number'],
                'speech_type': row['speech_type'],
                'class': row['class'],
                'class_id': row['class_id'],
                'timestamp': row['timestamp'],
                'sampling_frequency': row['sampling_frequency'],
                'duration_seconds': row['duration_seconds'],
                'time_start': row['time_start'],
                'time_end': row['time_end'],
                'freq_low': row['freq_low'],
                'freq_high': row['freq_high']
            })
        
        all_channel_files.extend(channel_files)
        success_count += 1
    else:
        fail_count += 1

print(f"\n=== CHANNEL SPLITTING COMPLETE ===")
print(f"✓ Successfully processed: {success_count} epochs")
print(f"✗ Failed: {fail_count} epochs")
print(f"✓ Total channel files created: {len(all_channel_files)}")
print(f"Success rate: {success_count/(success_count+fail_count)*100:.1f}%")

In [None]:
# Analyze results
if len(all_channel_files) > 0:
    channel_df = pd.DataFrame(all_channel_files)
    
    print("=== CHANNEL SPLITTING RESULTS ===")
    print(f"Total channel files: {len(channel_df)}")
    print(f"Unique epochs: {channel_df['epoch_id'].nunique()}")
    print(f"Unique channels: {channel_df['channel_name'].nunique()}")
    print(f"Files per epoch: {len(channel_df) // channel_df['epoch_id'].nunique()}")
    print(f"Files per channel: {len(channel_df) // channel_df['channel_name'].nunique()}")
    
    print("\n=== DATA STATISTICS ===")
    print(f"Data shape per channel: {channel_df['data_shape'].iloc[0]}")
    print(f"Data range: {channel_df['data_min'].min():.2e} to {channel_df['data_max'].max():.2e}")
    print(f"Average mean: {channel_df['data_mean'].mean():.2e}")
    print(f"Average std: {channel_df['data_std'].mean():.2e}")
    
    print("\n=== DISTRIBUTION CHECK ===")
    print("Files per speech type:")
    speech_counts = channel_df['speech_type'].value_counts()
    for speech_type, count in speech_counts.items():
        print(f"  {speech_type}: {count} files ({count//n_channels} epochs)")
    
    print("\nFiles per class:")
    class_counts = channel_df['class'].value_counts()
    for class_name, count in class_counts.items():
        print(f"  {class_name}: {count} files ({count//n_channels} epochs)")
    
    print("\nFiles per subject:")
    subject_counts = channel_df['subject_number'].value_counts().sort_index()
    for subject, count in subject_counts.head(5).items():
        print(f"  Subject {subject}: {count} files ({count//n_channels} epochs)")
    
    print("\nSample channel files:")
    for i, (_, row) in enumerate(channel_df.head(5).iterrows()):
        print(f"  {i+1}. {row['epoch_id']} - {row['channel_name']}: {row['by_epoch_path']}")
        
else:
    print("❌ No channel files were created!")

In [None]:
# Test loading individual channel files
if len(all_channel_files) > 0:
    print("=== TESTING CHANNEL FILES ===")
    
    # Test first 3 channel files
    test_files = channel_df.head(3)
    
    for i, (_, row) in enumerate(test_files.iterrows()):
        print(f"\nTest {i+1}: {row['epoch_id']} - {row['channel_name']}")
        
        try:
            # Test by_epoch file
            data_epoch = np.load(row['by_epoch_path'])
            print(f"  ✓ by_epoch file loaded: {data_epoch.shape}")
            
            # Test by_channel file
            data_channel = np.load(row['by_channel_path'])
            print(f"  ✓ by_channel file loaded: {data_channel.shape}")
            
            # Verify they're identical
            if np.array_equal(data_epoch, data_channel):
                print(f"  ✓ Files are identical")
            else:
                print(f"  ⚠ Files differ!")
            
            print(f"  Data range: {data_epoch.min():.2e} to {data_epoch.max():.2e}")
            print(f"  Duration: {len(data_epoch) / row['sampling_frequency']:.2f}s")
            
        except Exception as e:
            print(f"  ✗ Error loading files: {e}")
            
    # Test loading all files for one channel
    print(f"\n=== TESTING CHANNEL COLLECTION ===")
    test_channel = channel_names[0]  # First channel
    channel_files = channel_df[channel_df['channel_name'] == test_channel]
    
    print(f"Testing channel '{test_channel}' with {len(channel_files)} files")
    
    # Load first 3 files for this channel
    for i, (_, row) in enumerate(channel_files.head(3).iterrows()):
        try:
            data = np.load(row['by_channel_path'])
            print(f"  File {i+1}: {row['epoch_id']} - Shape: {data.shape}, Range: {data.min():.2e} to {data.max():.2e}")
        except Exception as e:
            print(f"  File {i+1}: Error - {e}")
            
else:
    print("No channel files to test")

In [None]:
# Save metadata and create summary
if len(all_channel_files) > 0:
    # Save channel files metadata
    csv_file = f"{OUTPUT_PATH}/metadata/channel_files_metadata.csv"
    pkl_file = f"{OUTPUT_PATH}/metadata/channel_files_metadata.pkl"
    summary_file = f"{OUTPUT_PATH}/metadata/split_summary.txt"
    
    channel_df.to_csv(csv_file, index=False)
    channel_df.to_pickle(pkl_file)
    
    # Create summary
    with open(summary_file, 'w') as f:
        f.write("CHANNEL SPLITTING SUMMARY\n")
        f.write("=" * 40 + "\n\n")
        
        f.write(f"Input: {INPUT_PATH}\n")
        f.write(f"Output: {OUTPUT_PATH}\n\n")
        
        f.write(f"Results:\n")
        f.write(f"  Successfully processed epochs: {success_count}\n")
        f.write(f"  Failed epochs: {fail_count}\n")
        f.write(f"  Total channel files created: {len(channel_df)}\n")
        f.write(f"  Success rate: {success_count/(success_count+fail_count)*100:.1f}%\n\n")
        
        f.write(f"Dataset Info:\n")
        f.write(f"  Unique epochs: {channel_df['epoch_id'].nunique()}\n")
        f.write(f"  Unique channels: {channel_df['channel_name'].nunique()}\n")
        f.write(f"  Files per epoch: {len(channel_df) // channel_df['epoch_id'].nunique()}\n")
        f.write(f"  Files per channel: {len(channel_df) // channel_df['channel_name'].nunique()}\n")
        f.write(f"  Data shape per file: {channel_df['data_shape'].iloc[0]}\n")
        f.write(f"  Sampling frequency: {channel_df['sampling_frequency'].iloc[0]} Hz\n")
        f.write(f"  Duration per file: {channel_df['duration_seconds'].iloc[0]} seconds\n\n")
        
        f.write(f"File Organization:\n")
        f.write(f"  by_epoch/: {channel_df['epoch_id'].nunique()} folders, {n_channels} files each\n")
        f.write(f"  by_channel/: {n_channels} folders, {channel_df['epoch_id'].nunique()} files each\n\n")
        
        f.write(f"Speech Type Distribution:\n")
        for speech_type, count in channel_df['speech_type'].value_counts().items():
            f.write(f"  {speech_type}: {count} files ({count//n_channels} epochs)\n")
        
        f.write(f"\nClass Distribution:\n")
        for class_name, count in channel_df['class'].value_counts().items():
            f.write(f"  {class_name}: {count} files ({count//n_channels} epochs)\n")
        
        f.write(f"\nChannel Names:\n")
        for i, channel in enumerate(channel_names):
            if i < 10 or i >= len(channel_names) - 10:
                f.write(f"  {channel}\n")
            elif i == 10:
                f.write(f"  ... ({len(channel_names) - 20} more channels) ...\n")
    
    print(f"\n=== FILES SAVED ===")
    print(f"✓ {len(channel_df)} channel files (.npy format)")
    print(f"✓ Metadata: {csv_file}")
    print(f"✓ Metadata: {pkl_file}")
    print(f"✓ Summary: {summary_file}")
    
    print(f"\n=== USAGE EXAMPLES ===")
    print(f"\n1. Load single channel file:")
    print(f"   data = np.load('individual_channels/by_epoch/epoch_id/channel.npy')")
    print(f"   # Shape: ({n_timepoints},) - time series for one channel")
    
    print(f"\n2. Load all channels for one epoch:")
    print(f"   epoch_folder = 'individual_channels/by_epoch/sub-01_ses-01_trial_000_Inner_Up/'")
    print(f"   channels = {{}}")
    print(f"   for ch in channel_names:")
    print(f"       channels[ch] = np.load(f'{{epoch_folder}}/{{ch}}.npy')")
    
    print(f"\n3. Load all epochs for one channel:")
    print(f"   channel_folder = 'individual_channels/by_channel/A1/'")
    print(f"   epochs = []")
    print(f"   for file in os.listdir(channel_folder):")
    print(f"       epochs.append(np.load(f'{{channel_folder}}/{{file}}'))")
    
    print(f"\n4. Use metadata for filtering:")
    print(f"   metadata = pd.read_csv('{csv_file}')")
    print(f"   inner_speech_A1 = metadata[")
    print(f"       (metadata['speech_type'] == 'Inner') & ")
    print(f"       (metadata['channel_name'] == 'A1')")
    print(f"   ]")
    
    print(f"\n🎉 CHANNEL SPLITTING COMPLETE!")
    print(f"Your data is now organized as:")
    print(f"  • {len(channel_df)} individual channel files")
    print(f"  • {channel_df['epoch_id'].nunique()} epochs × {n_channels} channels")
    print(f"  • Organized by epoch AND by channel")
    print(f"  • Each file: {n_timepoints} time points, {channel_df['duration_seconds'].iloc[0]:.2f}s duration")
    print(f"  • Ready for channel-specific analysis in: {OUTPUT_PATH}")
    
else:
    print("❌ No channel files to save")