# EEG Data Loader for Neural Processing of Nonsense Study

This notebook provides an interactive interface for loading and processing EEG data from the Neural Processing of Nonsense experiment. The data consists of complex frequency-domain coefficients from 32 electrodes across 4 experimental conditions.

## Experimental Conditions:
- **GN**: Grammatical Nonsense (trials 1-30)
- **GS**: Grammatical Sensible (trials 31-60)  
- **UN**: Ungrammatical Nonsense (trials 61-90)
- **US**: Ungrammatical Sensible (trials 91-120)

## Data Structure:
- **participant**: Subject ID (S1, S2, etc.)
- **frequency**: Frequency values in Hz (0.3-4.0 Hz range)
- **electrode**: Electrode names (32 channels)
- **trial**: Trial number (1-120)
- **coeff**: Complex frequency coefficients
- **condition**: Experimental condition (GN, GS, UN, US)

In [7]:
#!/usr/bin/env python3
"""EEG Data Loader for Neural Processing of Nonsense Study"""

import pandas as pd
from pathlib import Path
import re
import warnings
warnings.filterwarnings('ignore')


def load_freq_electrode_info(csv_dir):
    """Load frequency and electrode info, returning defaults if file missing"""
    try:
        file_path = Path(csv_dir) / "freq&channels.csv"
        with open(file_path, 'r') as f:
            lines = f.readlines()
        
        # Parse electrodes from first line
        electrodes = lines[0].strip().split(',')
        
        # Parse frequencies from third line (index 2)
        frequencies = [float(x) for x in lines[2].strip().split(',')]
        
        return electrodes, frequencies
    except:
        return [f"Ch{i+1}" for i in range(32)], list(range(58))


def get_condition(trial):
    """Map trial number to condition (GN: 1-30, GS: 31-60, UN: 61-90, US: 91-120)"""
    return ['GN', 'GS', 'UN', 'US'][(trial - 1) // 30] if 1 <= trial <= 120 else 'Unknown'


def load_csv_file(filepath, electrodes, frequencies):
    """Load and process a single CSV file"""
    try:
        data = pd.read_csv(filepath, header=None)
        if data.empty:
            return pd.DataFrame()

        participant = re.search(r'(S\d+)', filepath.name).group(1)
        rows = []

        for _, row in data.iterrows():
            trial, electrode_idx = int(row.iloc[0]), int(row.iloc[1]) - 1
            electrode = electrodes[electrode_idx] 

            for freq_idx, val in enumerate(row.iloc[2:]):
                if pd.notna(val) and val != '':
                    try:
                        coeff = complex(str(val).replace('i', 'j'))
                        if pd.notna(coeff):
                            rows.append([participant, frequencies[freq_idx] ,
                                         electrode, trial, coeff, get_condition(trial)])
                    except:
                        continue

        return pd.DataFrame(rows, columns=['participant', 'frequency', 'electrode', 'trial', 'coeff', 'condition'])
    except:
        return pd.DataFrame()


def load_all_eeg_data(csv_dir):
    """Load all CSV files and create comprehensive dataframe"""
    csv_dir = Path(csv_dir)
    electrodes, frequencies = load_freq_electrode_info(csv_dir)

    csv_files = list(csv_dir.glob("*_main.csv"))
    if not csv_files:
        raise FileNotFoundError(f"No *_main.csv files found in {csv_dir}")

    dataframes = [load_csv_file(f, electrodes, frequencies) for f in csv_files]
    dataframes = [df for df in dataframes if not df.empty]

    if not dataframes:
        raise ValueError("No data loaded from any files")

    return pd.concat(dataframes, ignore_index=True).sort_values(
        ['participant', 'trial', 'electrode', 'frequency']).reset_index(drop=True)


# Load EEG data
try:
    df = load_all_eeg_data("data/csv_files")
    
    # Save data
    output_file = "data/combined_eeg_data.csv"
    df.to_csv(output_file, index=False)
    
    # Brief summary
    print(f"Dataset: {len(df):,} rows | {df['participant'].nunique()} participants | {df['electrode'].nunique()} electrodes")
    print(f"Conditions: {', '.join(sorted(df['condition'].unique()))}")
    print(f"Frequency range: {df['frequency'].min():.1f}-{df['frequency'].max():.1f} Hz ({df['frequency'].nunique()} values)")
    for condition, count in df['condition'].value_counts().items():
        print(f"  {condition}: {count:,}")
    print(f"✓ Saved to: {output_file}")
    
except Exception as e:
    print(f"Error: {e}")
    df = None

Dataset: 3,118,080 rows | 14 participants | 32 electrodes
Conditions: GN, GS, UN, US
Frequency range: 0.3-4.0 Hz (58 values)
  GN: 779,520
  GS: 779,520
  UN: 779,520
  US: 779,520
✓ Saved to: data/combined_eeg_data.csv


In [9]:
# Debug: Check what files are available
from pathlib import Path

csv_dir = Path("data/csv_files")
print(f"Looking in directory: {csv_dir.absolute()}")
print(f"Directory exists: {csv_dir.exists()}")

if csv_dir.exists():
    all_files = list(csv_dir.glob("*"))
    print(f"All files in directory: {len(all_files)}")
    for f in all_files[:10]:  # Show first 10 files
        print(f"  {f.name}")
    
    main_files = list(csv_dir.glob("*_main.csv"))
    print(f"*_main.csv files found: {len(main_files)}")
    for f in main_files[:5]:  # Show first 5 main files
        print(f"  {f.name}")
else:
    print("Directory does not exist!")
    print("Current working directory:", Path.cwd())
    
    # Check if data folder exists
    data_dir = Path("data")
    print(f"Data directory exists: {data_dir.exists()}")
    if data_dir.exists():
        subdirs = [d for d in data_dir.iterdir() if d.is_dir()]
        print(f"Subdirectories in data: {[d.name for d in subdirs]}")

Looking in directory: C:\Users\Me\data\csv_files
Directory exists: True
All files in directory: 34
  .ipynb_checkpoints
  freq&channels.csv
  S10_17_07_2018_ft_coeff.csv
  S10_17_07_2018_main.csv
  S11_17_07_2018_ft_coeff.csv
  S11_17_07_2018_main.csv
  S12_18_07_2018_ft_coeff.csv
  S12_18_07_2018_main.csv
  S13_19_07_2018_ft_coeff.csv
  S13_19_07_2018_main.csv
*_main.csv files found: 16
  S10_17_07_2018_main.csv
  S11_17_07_2018_main.csv
  S12_18_07_2018_main.csv
  S13_19_07_2018_main.csv
  S14_19_07_2018_main.csv
