# Process Extracted MNE Epochs

This notebook processes all individual MNE epoch files to:
- **Crop time window**: Keep only t = 1.0 to 3.5 seconds (2.5s duration)
- **Bandpass filter**: Apply 70-100 Hz frequency filter (gamma band)
- **Save processed files**: Create new optimized versions

**Input**: `extracted_epochs_mne/` folder  
**Output**: `processed_epochs/` folder

In [None]:
import mne
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import warnings

# Configure MNE and warnings
mne.set_log_level('WARNING')
warnings.filterwarnings('ignore', category=RuntimeWarning)

print("Libraries imported successfully!")

In [None]:
# Configuration
INPUT_PATH = "extracted_epochs_mne"
OUTPUT_PATH = "processed_epochs"

# Processing parameters
TIME_START = 1.0     # Start time (seconds)
TIME_END = 3.5       # End time (seconds)
FREQ_LOW = 70.0      # Low frequency (Hz)
FREQ_HIGH = 100.0    # High frequency (Hz)

# Create output directories
os.makedirs(OUTPUT_PATH, exist_ok=True)
os.makedirs(f"{OUTPUT_PATH}/individual_epochs", exist_ok=True)
os.makedirs(f"{OUTPUT_PATH}/metadata", exist_ok=True)

print(f"Input: {INPUT_PATH}")
print(f"Output: {OUTPUT_PATH}")
print(f"Time window: {TIME_START}-{TIME_END}s ({TIME_END-TIME_START}s duration)")
print(f"Frequency: {FREQ_LOW}-{FREQ_HIGH} Hz")

# Check input exists
if not os.path.exists(INPUT_PATH):
    print(f"\n‚ùå ERROR: {INPUT_PATH} not found!")
    print("Run the epoch extraction notebook first.")
else:
    print(f"\n‚úì Input directory found")

In [None]:
# Load original metadata
metadata_file = f"{INPUT_PATH}/metadata/epochs_metadata.pkl"

if os.path.exists(metadata_file):
    original_metadata = pd.read_pickle(metadata_file)
    print(f"Loaded metadata for {len(original_metadata)} epochs")
    print(f"Original duration: {original_metadata['duration_seconds'].iloc[0]:.2f}s")
    print(f"Original time points: {original_metadata['n_timepoints'].iloc[0]}")
    print(f"Sampling frequency: {original_metadata['sampling_frequency'].iloc[0]} Hz")
else:
    print(f"‚ùå ERROR: Metadata not found at {metadata_file}")
    raise FileNotFoundError("Run epoch extraction first")

In [None]:
def process_epoch(input_file, output_file):
    """Process a single epoch: filter and crop."""
    try:
        # Load epoch
        epoch = mne.read_epochs(input_file, verbose=False)
        
        # Apply bandpass filter (70-100 Hz)
        epoch_filtered = epoch.copy().filter(
            l_freq=FREQ_LOW, 
            h_freq=FREQ_HIGH, 
            fir_design='firwin',
            verbose=False
        )
        
        # Crop time window (1.0-3.5s)
        epoch_processed = epoch_filtered.crop(
            tmin=TIME_START, 
            tmax=TIME_END,
            verbose=False
        )
        
        # Create output directory
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        
        # Save processed epoch
        epoch_processed.save(output_file, overwrite=True, verbose=False)
        
        # Return new metadata
        return {
            'success': True,
            'duration_seconds': TIME_END - TIME_START,
            'n_timepoints': epoch_processed.get_data().shape[2],
            'time_start': TIME_START,
            'time_end': TIME_END,
            'freq_low': FREQ_LOW,
            'freq_high': FREQ_HIGH,
            'processed': True
        }
        
    except Exception as e:
        print(f"Error processing {input_file}: {e}")
        return {'success': False}

# Process all epochs
print(f"\nProcessing {len(original_metadata)} epochs...")

processed_metadata = []
success_count = 0
fail_count = 0

for idx, row in tqdm(original_metadata.iterrows(), total=len(original_metadata), desc="Processing"):
    # File paths
    input_file = row['file_path']
    output_file = os.path.join(OUTPUT_PATH, row['relative_path'])
    
    # Process epoch
    result = process_epoch(input_file, output_file)
    
    if result['success']:
        # Update metadata - convert Series to dict first to avoid KeyError
        updated_row = row.to_dict()
        updated_row.update(result)
        updated_row['file_path'] = output_file
        updated_row['relative_path'] = os.path.relpath(output_file, OUTPUT_PATH)
        
        processed_metadata.append(updated_row)
        success_count += 1
    else:
        fail_count += 1

processed_df = pd.DataFrame(processed_metadata)

print(f"\n=== PROCESSING COMPLETE ===")
print(f"‚úì Successfully processed: {success_count} epochs")
print(f"‚úó Failed: {fail_count} epochs")
print(f"Success rate: {success_count/(success_count+fail_count)*100:.1f}%")

In [None]:
# Analyze results
if len(processed_df) > 0:
    print("=== PROCESSING RESULTS ===")
    print(f"Processed epochs: {len(processed_df)}")
    print(f"New duration: {processed_df['duration_seconds'].iloc[0]:.2f}s")
    print(f"New time points: {processed_df['n_timepoints'].iloc[0]}")
    print(f"Time window: {processed_df['time_start'].iloc[0]}-{processed_df['time_end'].iloc[0]}s")
    print(f"Frequency: {processed_df['freq_low'].iloc[0]}-{processed_df['freq_high'].iloc[0]} Hz")
    
    print("\n=== BEFORE vs AFTER ===")
    orig_duration = original_metadata['duration_seconds'].iloc[0]
    new_duration = processed_df['duration_seconds'].iloc[0]
    orig_points = original_metadata['n_timepoints'].iloc[0]
    new_points = processed_df['n_timepoints'].iloc[0]
    
    print(f"Duration: {orig_duration:.2f}s ‚Üí {new_duration:.2f}s")
    print(f"Time points: {orig_points} ‚Üí {new_points}")
    print(f"Data reduction: {(1 - new_points/orig_points)*100:.1f}%")
    
    print("\n=== DISTRIBUTION CHECK ===")
    print("Speech types:", processed_df['speech_type'].value_counts().to_dict())
    print("Classes:", processed_df['class'].value_counts().to_dict())
    print("Subjects:", processed_df['subject_number'].nunique())
else:
    print("‚ùå No epochs were successfully processed!")

In [None]:
# Test loading processed epochs
if len(processed_df) > 0:
    print("=== TESTING PROCESSED EPOCHS ===")
    
    # Test first 3 epochs
    for i, (_, row) in enumerate(processed_df.head(3).iterrows()):
        print(f"\nTest {i+1}: {row['epoch_id']}")
        
        try:
            epoch = mne.read_epochs(row['file_path'], verbose=False)
            data = epoch.get_data()
            
            print(f"  ‚úì Loaded successfully")
            print(f"  Data shape: {data.shape}")
            print(f"  Time: {epoch.tmin:.2f} to {epoch.tmax:.2f}s")
            print(f"  Duration: {epoch.tmax - epoch.tmin:.2f}s")
            
            # Verify duration
            expected = TIME_END - TIME_START
            actual = epoch.tmax - epoch.tmin
            if abs(actual - expected) < 0.01:
                print(f"  ‚úì Duration verified: {actual:.2f}s")
            else:
                print(f"  ‚ö† Duration issue: {actual:.2f}s (expected {expected:.2f}s)")
                
        except Exception as e:
            print(f"  ‚úó Error: {e}")
else:
    print("No processed epochs to test")

In [None]:
# Save processed metadata and summary
if len(processed_df) > 0:
    # Save metadata files
    csv_file = f"{OUTPUT_PATH}/metadata/processed_epochs_metadata.csv"
    pkl_file = f"{OUTPUT_PATH}/metadata/processed_epochs_metadata.pkl"
    summary_file = f"{OUTPUT_PATH}/metadata/processing_summary.txt"
    
    processed_df.to_csv(csv_file, index=False)
    processed_df.to_pickle(pkl_file)
    
    # Create summary
    with open(summary_file, 'w') as f:
        f.write("PROCESSED EPOCHS SUMMARY\n")
        f.write("=" * 40 + "\n\n")
        f.write(f"Processing Parameters:\n")
        f.write(f"  Time window: {TIME_START} - {TIME_END} seconds\n")
        f.write(f"  Duration: {TIME_END - TIME_START} seconds\n")
        f.write(f"  Frequency filter: {FREQ_LOW} - {FREQ_HIGH} Hz\n\n")
        
        f.write(f"Results:\n")
        f.write(f"  Successfully processed: {success_count} epochs\n")
        f.write(f"  Failed: {fail_count} epochs\n")
        f.write(f"  Success rate: {success_count/(success_count+fail_count)*100:.1f}%\n\n")
        
        f.write(f"Data Changes:\n")
        f.write(f"  Original duration: {original_metadata['duration_seconds'].iloc[0]:.2f}s\n")
        f.write(f"  New duration: {processed_df['duration_seconds'].iloc[0]:.2f}s\n")
        f.write(f"  Original time points: {original_metadata['n_timepoints'].iloc[0]}\n")
        f.write(f"  New time points: {processed_df['n_timepoints'].iloc[0]}\n")
        f.write(f"  Data reduction: {(1 - processed_df['n_timepoints'].iloc[0]/original_metadata['n_timepoints'].iloc[0])*100:.1f}%\n\n")
        
        f.write(f"Dataset Info:\n")
        f.write(f"  Subjects: {processed_df['subject_number'].nunique()}\n")
        f.write(f"  Sampling frequency: {processed_df['sampling_frequency'].iloc[0]} Hz\n")
        f.write(f"  Channels: {processed_df['n_channels'].iloc[0]}\n")
    
    print(f"\n=== FILES SAVED ===")
    print(f"‚úì {len(processed_df)} processed epoch files")
    print(f"‚úì Metadata: {csv_file}")
    print(f"‚úì Metadata: {pkl_file}")
    print(f"‚úì Summary: {summary_file}")
    
    print(f"\n=== USAGE ===")
    print(f"Load single epoch:")
    print(f"  epoch = mne.read_epochs('processed_epochs/individual_epochs/...')")
    print(f"\nLoad metadata:")
    print(f"  metadata = pd.read_csv('{csv_file}')")
    print(f"\nFilter epochs:")
    print(f"  inner_speech = metadata[metadata['speech_type'] == 'Inner']")
    
    print(f"\nüéâ PROCESSING COMPLETE!")
    print(f"Your epochs are now:")
    print(f"  ‚Ä¢ Cropped to {TIME_START}-{TIME_END}s ({TIME_END-TIME_START}s duration)")
    print(f"  ‚Ä¢ Filtered to {FREQ_LOW}-{FREQ_HIGH} Hz (gamma band)")
    print(f"  ‚Ä¢ {(1 - processed_df['n_timepoints'].iloc[0]/original_metadata['n_timepoints'].iloc[0])*100:.1f}% smaller in size")
    print(f"  ‚Ä¢ Ready for analysis in: {OUTPUT_PATH}")
else:
    print("‚ùå No processed data to save")