# 5xFAD Resting State EEG - Simple Processing

A minimal notebook that replicates the core processing workflow:
1. Extract Open Ephys sessions
2. Apply standard preprocessing pipeline
3. Save to HDF5 for Julia analysis

**Just edit the paths in cell 2 and run all cells!**

In [None]:
# Activate conda environment and import required modules
import subprocess
import sys
import os
from pathlib import Path

# Ensure we're using the spectra conda environment
try:
    # Check if we're already in the right environment
    conda_env = os.environ.get('CONDA_DEFAULT_ENV', 'base')
    if conda_env != 'spectra':
        print(f"⚠️ Current environment: {conda_env}")
        print("💡 Please activate the 'spectra' environment:")
        print("   conda activate spectra")
        print("   Then restart this notebook")
    else:
        print(f"✅ Using conda environment: {conda_env}")
except:
    print("⚠️ Could not detect conda environment")

# Add the modules directory to Python path
notebook_dir = Path.cwd()
if 'notebooks' in str(notebook_dir):
    modules_dir = notebook_dir.parent / "modules"
else:
    modules_dir = notebook_dir / "modules"
sys.path.insert(0, str(modules_dir))

# Import openephysextract modules
try:
    from openephysextract.extractor import Extractor
    from openephysextract.preprocess import (
        Preprocessor, RemoveBadStep, FilterStep, 
        DownsampleStep, EpochStep, StandardizeStep
    )
    from openephysextract.utilities import spreadsheet
    print("✅ Imports loaded successfully")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("💡 Make sure you're in the 'spectra' conda environment")
    print("   conda activate spectra")

In [None]:
# ================================
# CONFIGURATION - EDIT THESE PATHS
# ================================

# Data paths
SOURCE_FOLDER = '/Volumes/STORAGE 1.0/UNIC Research/5xFAD Resting State'
OUTPUT_FOLDER = '/Users/fomo/Documents/Research/UNIC Research/Neuroelectrophysiology/5xFAD Resting State/data'
NOTES_PATH = '/Users/fomo/Documents/Research/UNIC Research/Neuroelectrophysiology/notes/MICE_LIST_EEG.xlsx'

# Experiment settings
EXPERIMENT_NAME = '5xFAD Resting State'
SAMPLING_RATE = 30000
CHANNELS = [3, 4, 5, 6, 7, 8]  # S1 L+R, V1/V2 L+R channels

# Processing settings  
USE_GPU = True
DEVICE = 'mps'  # 'mps' for Apple Silicon, 'cuda' for NVIDIA, 'cpu' for CPU-only

print(f"📁 Source: {SOURCE_FOLDER}")
print(f"📁 Output: {OUTPUT_FOLDER}")
print(f"🔧 Device: {DEVICE}")

In [None]:
# Load experimental metadata
print("📋 Loading session notes...")

try:
    # Try to load notes from Excel file
    notes = spreadsheet(
        location=os.path.dirname(NOTES_PATH),
        name=os.path.basename(NOTES_PATH),
        id='Session',
        relevant=sorted(os.listdir(SOURCE_FOLDER)),
        sheet='MEP'
    )
    print(f"✅ Loaded notes for {len(notes)} sessions")
except Exception as e:
    print(f"⚠️ Could not load notes: {e}")
    print("📝 Will use session names directly")
    notes = None

In [None]:
# Extract Open Ephys sessions
print("🔬 Extracting sessions from Open Ephys data...")

extractor = Extractor(
    source=SOURCE_FOLDER,
    experiment=EXPERIMENT_NAME,
    sampling_rate=SAMPLING_RATE,
    output='/tmp/5xfad_cache',  # Temporary cache location
    notes=notes,
    channels=CHANNELS
)

sessions = extractor.extractify(export=False)
print(f"✅ Extracted {len(sessions)} sessions")

# Show session info
for i, session in enumerate(sessions[:3]):  # Show first 3
    print(f"Session {i+1}: {session.session} - {session.data.shape}")
if len(sessions) > 3:
    print(f"... and {len(sessions)-3} more sessions")

In [None]:
# Create preprocessing pipeline
print("⚙️ Setting up preprocessing pipeline...")

# Define processing steps (same as original notebook)
steps = [
    RemoveBadStep(std=True, alpha=0.5, beta=0.5, cutoff_pct=90),  # Remove bad channels/epochs
    FilterStep(lowcut=0.1, highcut=80, order=4),                 # Bandpass filter 0.1-80 Hz
    DownsampleStep(target_fs=100, downsample_raw=True),          # Downsample to 100 Hz
    EpochStep(frame=100, stride=10),                             # 1s epochs, 90% overlap
    StandardizeStep(method='zscore', per_epoch=True)             # Z-score standardization
]

# Create preprocessor
preprocessor = Preprocessor(
    steps=steps,
    device=DEVICE,
    log=False,
    verbose=True
)

print("✅ Preprocessing pipeline ready:")
for i, step in enumerate(steps, 1):
    print(f"  {i}. {step.__class__.__name__}")

In [None]:
# Apply preprocessing to all sessions
print("🔄 Preprocessing sessions...")

processed_sessions = preprocessor.preprocess(sessions, use_gpu=USE_GPU)

print(f"✅ Preprocessed {len(processed_sessions)} sessions")

# Show processed data info
for i, session in enumerate(processed_sessions[:3]):  # Show first 3
    data_shape = session.data.processed.shape if hasattr(session.data, 'processed') else session.data.shape
    print(f"Processed {i+1}: {session.session} - {data_shape}")
if len(processed_sessions) > 3:
    print(f"... and {len(processed_sessions)-3} more sessions")

In [None]:
# Save processed sessions to HDF5 format
print("💾 Saving sessions to HDF5...")

# Create output directory
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

saved_files = []
for session in processed_sessions:
    output_path = os.path.join(OUTPUT_FOLDER, f'{session.session}.h5')
    session.to_hdf5(output_path)
    saved_files.append(output_path)
    print(f"💾 Saved: {session.session}.h5")

print(f"\n🎉 Processing complete!")
print(f"📁 Saved {len(saved_files)} HDF5 files to: {OUTPUT_FOLDER}")
print(f"\n🔬 Ready for Julia analysis!")
print(f"Load in Julia with: session = from_hdf5(\"{saved_files[0]}\")")