# Z-Score Normalization for EEG Datasets

This notebook applies z-score normalization to EEG datasets. It can handle:
- **Individual channel files** (.npy format)
- **MNE epoch files** (.fif format)
- **Mixed datasets** (automatically detects file types)

## Z-Score Formula:
```
z = (x - μ) / σ
```
Where:
- `x` = original value
- `μ` = mean
- `σ` = standard deviation

## Normalization Options:
1. **Per-file normalization**: Each file normalized independently
2. **Global normalization**: All files normalized using global statistics
3. **Per-channel normalization**: Each channel normalized independently (for multi-channel data)

In [4]:
import numpy as np
import pandas as pd
import os
import mne
from tqdm import tqdm
import warnings
import glob
from pathlib import Path
import shutil
import sys

# Configure settings
mne.set_log_level('WARNING')
warnings.filterwarnings('ignore', category=RuntimeWarning)

print("Libraries imported successfully!")

Libraries imported successfully!


In [5]:
# Configuration
INPUT_DIR = r"D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/by_epoch"
OUTPUT_DIR = r"D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data"

# Normalization options
NORMALIZATION_TYPE = "per_file"    # Options: "per_file", "global", "per_channel"
PRESERVE_STRUCTURE = True          # Keep original folder structure
BACKUP_ORIGINAL = False            # Create backup of original data

print(f"Configuration:")
print(f"  Input directory: {INPUT_DIR}")
print(f"  Output directory: {OUTPUT_DIR}")
print(f"  Normalization type: {NORMALIZATION_TYPE}")
print(f"  Preserve structure: {PRESERVE_STRUCTURE}")
print(f"  Backup original: {BACKUP_ORIGINAL}")

# Check input directory
if not os.path.exists(INPUT_DIR):
    print(f"\n❌ ERROR: Input directory '{INPUT_DIR}' not found!")
    print("Please specify a valid dataset directory.")
else:
    print(f"\n✓ Input directory found")
    
# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/metadata", exist_ok=True)
print(f"✓ Output directory created")

Configuration:
  Input directory: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\individual_channels/by_epoch
  Output directory: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data
  Normalization type: per_file
  Preserve structure: True
  Backup original: False

✓ Input directory found
✓ Output directory created


In [6]:
def detect_file_types(directory):
    """
    Detect what types of files are in the directory.
    
    Returns:
    - file_info: Dictionary with file type statistics
    """
    file_info = {
        'npy_files': [],
        'fif_files': [],
        'other_files': [],
        'total_files': 0
    }
    
    # Walk through directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            file_info['total_files'] += 1
            
            if file.endswith('.npy'):
                file_info['npy_files'].append(file_path)
            elif file.endswith('.fif'):
                file_info['fif_files'].append(file_path)
            else:
                file_info['other_files'].append(file_path)
    
    return file_info

# Analyze input directory
print("Analyzing input directory...")
file_info = detect_file_types(INPUT_DIR)

print(f"\n=== FILE ANALYSIS ===")
print(f"Total files: {file_info['total_files']}")
print(f"NumPy files (.npy): {len(file_info['npy_files'])}")
print(f"MNE files (.fif): {len(file_info['fif_files'])}")
print(f"Other files: {len(file_info['other_files'])}")

if len(file_info['npy_files']) > 0:
    print(f"\nSample .npy files:")
    for i, file in enumerate(file_info['npy_files'][:3]):
        print(f"  {i+1}. {os.path.relpath(file, INPUT_DIR)}")

if len(file_info['fif_files']) > 0:
    print(f"\nSample .fif files:")
    for i, file in enumerate(file_info['fif_files'][:3]):
        print(f"  {i+1}. {os.path.relpath(file, INPUT_DIR)}")

# Determine primary file type
if len(file_info['npy_files']) > len(file_info['fif_files']):
    primary_type = 'npy'
    primary_files = file_info['npy_files']
elif len(file_info['fif_files']) > 0:
    primary_type = 'fif'
    primary_files = file_info['fif_files']
else:
    primary_type = None
    primary_files = []

print(f"\nPrimary file type: {primary_type}")
print(f"Files to process: {len(primary_files)}")

Analyzing input directory...

=== FILE ANALYSIS ===
Total files: 696320
NumPy files (.npy): 696320
MNE files (.fif): 0
Other files: 0

Sample .npy files:
  1. sub-01_ses-01_trial_000\sub-01_ses-01_trial_000_Pronounced_Left_A1.npy
  2. sub-01_ses-01_trial_000\sub-01_ses-01_trial_000_Pronounced_Left_A10.npy
  3. sub-01_ses-01_trial_000\sub-01_ses-01_trial_000_Pronounced_Left_A11.npy

Primary file type: npy
Files to process: 696320


In [7]:
def load_data_file(file_path, file_type):
    """
    Load data from file based on type.
    
    Returns:
    - data: numpy array
    - metadata: dict with file info
    """
    try:
        if file_type == 'npy':
            data = np.load(file_path)
            metadata = {
                'file_type': 'npy',
                'original_shape': data.shape,
                'original_dtype': str(data.dtype)
            }
            
        elif file_type == 'fif':
            epoch = mne.read_epochs(file_path, verbose=False)
            data = epoch.get_data()
            metadata = {
                'file_type': 'fif',
                'original_shape': data.shape,
                'original_dtype': str(data.dtype),
                'sampling_freq': epoch.info['sfreq'],
                'n_channels': len(epoch.ch_names),
                'channel_names': epoch.ch_names
            }
            
        else:
            raise ValueError(f"Unsupported file type: {file_type}")
            
        return data, metadata
        
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None

def save_normalized_data(data, output_path, file_type, original_metadata=None):
    """
    Save normalized data in the same format as original.
    """
    try:
        # Create output directory
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        if file_type == 'npy':
            np.save(output_path, data)
            
        elif file_type == 'fif':
            # For .fif files, we need to reconstruct the MNE object
            # This is more complex and requires original epoch structure
            # For now, save as .npy and note the conversion
            output_path = output_path.replace('.fif', '_normalized.npy')
            np.save(output_path, data)
            
        return True, output_path
        
    except Exception as e:
        print(f"Error saving {output_path}: {e}")
        return False, None

# Test loading a sample file
if len(primary_files) > 0:
    print(f"\n=== TESTING FILE LOADING ===")
    sample_file = primary_files[0]
    print(f"Testing: {os.path.relpath(sample_file, INPUT_DIR)}")
    
    data, metadata = load_data_file(sample_file, primary_type)
    
    if data is not None:
        print(f"✓ Successfully loaded")
        print(f"  Shape: {data.shape}")
        print(f"  Data type: {data.dtype}")
        print(f"  Data range: {data.min():.2e} to {data.max():.2e}")
        print(f"  Mean: {data.mean():.2e}")
        print(f"  Std: {data.std():.2e}")
        
        if metadata:
            print(f"  Metadata: {metadata}")
    else:
        print(f"✗ Failed to load sample file")
else:
    print(f"\n❌ No processable files found!")


=== TESTING FILE LOADING ===
Testing: sub-01_ses-01_trial_000\sub-01_ses-01_trial_000_Pronounced_Left_A1.npy
✓ Successfully loaded
  Shape: (641,)
  Data type: float64
  Data range: -2.17e-05 to 1.40e-05
  Mean: -1.08e-08
  Std: 5.64e-06
  Metadata: {'file_type': 'npy', 'original_shape': (641,), 'original_dtype': 'float64'}


In [8]:
def compute_global_statistics(file_list, file_type):
    """
    Compute global mean and std across all files for global normalization.
    """
    print("Computing global statistics...")
    
    all_values = []
    n_samples = 0
    
    # Sample a subset of files for efficiency if dataset is very large
    sample_size = min(len(file_list), 1000)  # Sample up to 1000 files
    sample_files = np.random.choice(file_list, sample_size, replace=False)
    
    for file_path in tqdm(sample_files, desc="Computing global stats"):
        data, _ = load_data_file(file_path, file_type)
        if data is not None:
            all_values.extend(data.flatten())
            n_samples += data.size
            
            # Limit memory usage
            if len(all_values) > 10_000_000:  # 10M samples
                break
    
    if len(all_values) > 0:
        global_mean = np.mean(all_values)
        global_std = np.std(all_values)
        
        print(f"Global statistics computed from {len(sample_files)} files:")
        print(f"  Samples analyzed: {len(all_values):,}")
        print(f"  Global mean: {global_mean:.6e}")
        print(f"  Global std: {global_std:.6e}")
        
        return global_mean, global_std
    else:
        print("❌ Could not compute global statistics")
        return None, None

def normalize_data(data, normalization_type, global_mean=None, global_std=None):
    """
    Apply z-score normalization to data.
    
    Parameters:
    - data: numpy array
    - normalization_type: "per_file", "global", or "per_channel"
    - global_mean, global_std: for global normalization
    
    Returns:
    - normalized_data: z-score normalized data
    - norm_stats: normalization statistics used
    """
    
    if normalization_type == "per_file":
        # Normalize entire file using file's mean and std
        mean = np.mean(data)
        std = np.std(data)
        
        if std == 0:
            normalized_data = np.zeros_like(data)
        else:
            normalized_data = (data - mean) / std
            
        norm_stats = {'mean': mean, 'std': std, 'type': 'per_file'}
        
    elif normalization_type == "global":
        # Use global statistics
        if global_mean is None or global_std is None:
            raise ValueError("Global statistics not provided")
            
        if global_std == 0:
            normalized_data = np.zeros_like(data)
        else:
            normalized_data = (data - global_mean) / global_std
            
        norm_stats = {'mean': global_mean, 'std': global_std, 'type': 'global'}
        
    elif normalization_type == "per_channel":
        # Normalize each channel independently
        normalized_data = np.zeros_like(data)
        norm_stats = {'type': 'per_channel', 'channel_stats': []}
        
        if data.ndim == 1:
            # 1D data - treat as single channel
            mean = np.mean(data)
            std = np.std(data)
            if std == 0:
                normalized_data = np.zeros_like(data)
            else:
                normalized_data = (data - mean) / std
            norm_stats['channel_stats'].append({'mean': mean, 'std': std})
            
        elif data.ndim == 2:
            # 2D data - normalize each row (channel)
            for ch in range(data.shape[0]):
                mean = np.mean(data[ch])
                std = np.std(data[ch])
                if std == 0:
                    normalized_data[ch] = np.zeros_like(data[ch])
                else:
                    normalized_data[ch] = (data[ch] - mean) / std
                norm_stats['channel_stats'].append({'mean': mean, 'std': std})
                
        elif data.ndim == 3:
            # 3D data (epochs, channels, time) - normalize each channel across time
            for ep in range(data.shape[0]):
                for ch in range(data.shape[1]):
                    mean = np.mean(data[ep, ch])
                    std = np.std(data[ep, ch])
                    if std == 0:
                        normalized_data[ep, ch] = np.zeros_like(data[ep, ch])
                    else:
                        normalized_data[ep, ch] = (data[ep, ch] - mean) / std
                    norm_stats['channel_stats'].append({'epoch': ep, 'channel': ch, 'mean': mean, 'std': std})
    
    else:
        raise ValueError(f"Unknown normalization type: {normalization_type}")
    
    return normalized_data, norm_stats

# Compute global statistics if needed
global_mean, global_std = None, None

if NORMALIZATION_TYPE == "global" and len(primary_files) > 0:
    global_mean, global_std = compute_global_statistics(primary_files, primary_type)
    
    if global_mean is None:
        print("❌ Failed to compute global statistics. Switching to per-file normalization.")
        NORMALIZATION_TYPE = "per_file"

print(f"\nReady to normalize {len(primary_files)} files using {NORMALIZATION_TYPE} normalization")


Ready to normalize 696320 files using per_file normalization


In [9]:
# Process all files
if len(primary_files) > 0:
    print(f"\n=== NORMALIZING {len(primary_files)} FILES ===")
    
    normalization_results = []
    success_count = 0
    fail_count = 0
    
    for file_path in tqdm(primary_files, desc=f"Normalizing ({NORMALIZATION_TYPE})"):
        try:
            # Load original data
            data, metadata = load_data_file(file_path, primary_type)
            
            if data is None:
                fail_count += 1
                continue
            
            # Apply normalization
            normalized_data, norm_stats = normalize_data(
                data, NORMALIZATION_TYPE, global_mean, global_std
            )
            
            # Determine output path
            if PRESERVE_STRUCTURE:
                rel_path = os.path.relpath(file_path, INPUT_DIR)
                output_path = os.path.join(OUTPUT_DIR, rel_path)
            else:
                filename = os.path.basename(file_path)
                output_path = os.path.join(OUTPUT_DIR, filename)
            
            # Save normalized data
            save_success, final_output_path = save_normalized_data(
                normalized_data, output_path, primary_type, metadata
            )
            
            if save_success:
                # Record results
                result = {
                    'original_file': file_path,
                    'normalized_file': final_output_path,
                    'original_shape': data.shape,
                    'original_mean': float(np.mean(data)),
                    'original_std': float(np.std(data)),
                    'original_min': float(np.min(data)),
                    'original_max': float(np.max(data)),
                    'normalized_mean': float(np.mean(normalized_data)),
                    'normalized_std': float(np.std(normalized_data)),
                    'normalized_min': float(np.min(normalized_data)),
                    'normalized_max': float(np.max(normalized_data)),
                    'normalization_type': NORMALIZATION_TYPE,
                    'file_type': primary_type
                }
                
                # Add normalization stats
                if NORMALIZATION_TYPE in ['per_file', 'global']:
                    result['norm_mean'] = float(norm_stats['mean'])
                    result['norm_std'] = float(norm_stats['std'])
                
                normalization_results.append(result)
                success_count += 1
            else:
                fail_count += 1
                
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            fail_count += 1
    
    print(f"\n=== NORMALIZATION COMPLETE ===")
    print(f"✓ Successfully normalized: {success_count} files")
    print(f"✗ Failed: {fail_count} files")
    print(f"Success rate: {success_count/(success_count+fail_count)*100:.1f}%")
    
else:
    print("❌ No files to process")
    normalization_results = []


=== NORMALIZING 696320 FILES ===


Normalizing (per_file): 100%|██████████| 696320/696320 [34:42<00:00, 334.41it/s] 


=== NORMALIZATION COMPLETE ===
✓ Successfully normalized: 696320 files
✗ Failed: 0 files
Success rate: 100.0%





In [10]:
# Analyze normalization results
if len(normalization_results) > 0:
    results_df = pd.DataFrame(normalization_results)
    
    print("=== NORMALIZATION ANALYSIS ===")
    print(f"Files processed: {len(results_df)}")
    print(f"Normalization type: {NORMALIZATION_TYPE}")
    
    print("\n=== BEFORE NORMALIZATION ===")
    print(f"Mean range: {results_df['original_mean'].min():.2e} to {results_df['original_mean'].max():.2e}")
    print(f"Std range: {results_df['original_std'].min():.2e} to {results_df['original_std'].max():.2e}")
    print(f"Data range: {results_df['original_min'].min():.2e} to {results_df['original_max'].max():.2e}")
    print(f"Average mean: {results_df['original_mean'].mean():.2e}")
    print(f"Average std: {results_df['original_std'].mean():.2e}")
    
    print("\n=== AFTER NORMALIZATION ===")
    print(f"Mean range: {results_df['normalized_mean'].min():.2e} to {results_df['normalized_mean'].max():.2e}")
    print(f"Std range: {results_df['normalized_std'].min():.2e} to {results_df['normalized_std'].max():.2e}")
    print(f"Data range: {results_df['normalized_min'].min():.2e} to {results_df['normalized_max'].max():.2e}")
    print(f"Average mean: {results_df['normalized_mean'].mean():.2e}")
    print(f"Average std: {results_df['normalized_std'].mean():.2e}")
    
    # Check normalization quality
    if NORMALIZATION_TYPE == "per_file":
        print("\n=== NORMALIZATION QUALITY CHECK ===")
        mean_close_to_zero = np.abs(results_df['normalized_mean']) < 1e-10
        std_close_to_one = np.abs(results_df['normalized_std'] - 1.0) < 1e-10
        
        print(f"Files with mean ≈ 0: {mean_close_to_zero.sum()}/{len(results_df)} ({mean_close_to_zero.mean()*100:.1f}%)")
        print(f"Files with std ≈ 1: {std_close_to_one.sum()}/{len(results_df)} ({std_close_to_one.mean()*100:.1f}%)")
        
        if mean_close_to_zero.mean() > 0.95 and std_close_to_one.mean() > 0.95:
            print("✓ Normalization quality: EXCELLENT")
        elif mean_close_to_zero.mean() > 0.8 and std_close_to_one.mean() > 0.8:
            print("✓ Normalization quality: GOOD")
        else:
            print("⚠ Normalization quality: NEEDS REVIEW")
    
    # Show sample results
    print("\n=== SAMPLE RESULTS ===")
    sample_results = results_df.head(3)
    for i, (_, row) in enumerate(sample_results.iterrows()):
        print(f"\nFile {i+1}: {os.path.basename(row['original_file'])}")
        print(f"  Original: mean={row['original_mean']:.2e}, std={row['original_std']:.2e}")
        print(f"  Normalized: mean={row['normalized_mean']:.2e}, std={row['normalized_std']:.2e}")
        print(f"  Output: {os.path.relpath(row['normalized_file'], OUTPUT_DIR)}")
        
else:
    print("❌ No normalization results to analyze")

=== NORMALIZATION ANALYSIS ===
Files processed: 696320
Normalization type: per_file

=== BEFORE NORMALIZATION ===
Mean range: -8.60e-07 to 3.08e-07
Std range: 9.63e-07 to 6.05e-05
Data range: -6.03e-04 to 5.37e-04
Average mean: -4.81e-10
Average std: 4.66e-06

=== AFTER NORMALIZATION ===
Mean range: -6.03e-17 to 5.68e-17
Std range: 1.00e+00 to 1.00e+00
Data range: -1.43e+01 to 1.41e+01
Average mean: 3.56e-20
Average std: 1.00e+00

=== NORMALIZATION QUALITY CHECK ===
Files with mean ≈ 0: 696320/696320 (100.0%)
Files with std ≈ 1: 696320/696320 (100.0%)
✓ Normalization quality: EXCELLENT

=== SAMPLE RESULTS ===

File 1: sub-01_ses-01_trial_000_Pronounced_Left_A1.npy
  Original: mean=-1.08e-08, std=5.64e-06
  Normalized: mean=-1.66e-17, std=1.00e+00
  Output: sub-01_ses-01_trial_000\sub-01_ses-01_trial_000_Pronounced_Left_A1.npy

File 2: sub-01_ses-01_trial_000_Pronounced_Left_A10.npy
  Original: mean=-1.42e-08, std=6.97e-06
  Normalized: mean=1.32e-17, std=1.00e+00
  Output: sub-01_ses-0

In [11]:
# Test loading normalized files
if len(normalization_results) > 0:
    print("=== TESTING NORMALIZED FILES ===")
    
    # Test first 3 normalized files
    test_files = results_df.head(3)
    
    for i, (_, row) in enumerate(test_files.iterrows()):
        print(f"\nTest {i+1}: {os.path.basename(row['normalized_file'])}")
        
        try:
            # Load normalized data
            if row['normalized_file'].endswith('.npy'):
                norm_data = np.load(row['normalized_file'])
                print(f"  ✓ Loaded successfully")
                print(f"  Shape: {norm_data.shape}")
                print(f"  Mean: {norm_data.mean():.2e}")
                print(f"  Std: {norm_data.std():.2e}")
                print(f"  Range: {norm_data.min():.2e} to {norm_data.max():.2e}")
                
                # Verify normalization
                if NORMALIZATION_TYPE == "per_file":
                    if abs(norm_data.mean()) < 1e-10 and abs(norm_data.std() - 1.0) < 1e-10:
                        print(f"  ✓ Normalization verified (mean≈0, std≈1)")
                    else:
                        print(f"  ⚠ Normalization may have issues")
                        
            else:
                print(f"  ⚠ Unsupported file format for testing")
                
        except Exception as e:
            print(f"  ✗ Error loading: {e}")
            
else:
    print("No normalized files to test")

=== TESTING NORMALIZED FILES ===

Test 1: sub-01_ses-01_trial_000_Pronounced_Left_A1.npy
  ✓ Loaded successfully
  Shape: (641,)
  Mean: -1.66e-17
  Std: 1.00e+00
  Range: -3.84e+00 to 2.48e+00
  ✓ Normalization verified (mean≈0, std≈1)

Test 2: sub-01_ses-01_trial_000_Pronounced_Left_A10.npy
  ✓ Loaded successfully
  Shape: (641,)
  Mean: 1.32e-17
  Std: 1.00e+00
  Range: -3.06e+00 to 3.07e+00
  ✓ Normalization verified (mean≈0, std≈1)

Test 3: sub-01_ses-01_trial_000_Pronounced_Left_A11.npy
  ✓ Loaded successfully
  Shape: (641,)
  Mean: 2.88e-17
  Std: 1.00e+00
  Range: -3.36e+00 to 2.81e+00
  ✓ Normalization verified (mean≈0, std≈1)


In [12]:
if hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding='utf-8')
# Save results and create summary
if len(normalization_results) > 0:
    # Save normalization results
    results_csv = f"{OUTPUT_DIR}/metadata/normalization_results.csv"
    results_pkl = f"{OUTPUT_DIR}/metadata/normalization_results.pkl"
    summary_file = f"{OUTPUT_DIR}/metadata/normalization_summary.txt"
    
    results_df.to_csv(results_csv, index=False)
    results_df.to_pickle(results_pkl)
    
    # Create summary file in UTF-8
    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write("Z-SCORE NORMALIZATION SUMMARY\n")
        f.write("=" * 50 + "\n\n")
        
        f.write(f"Input Directory: {INPUT_DIR}\n")
        f.write(f"Output Directory: {OUTPUT_DIR}\n")
        f.write(f"Normalization Type: {NORMALIZATION_TYPE}\n")
        f.write(f"Preserve Structure: {PRESERVE_STRUCTURE}\n\n")
        
        f.write(f"Processing Results:\n")
        f.write(f"  Files processed: {len(results_df)}\n")
        f.write(f"  Success rate: {success_count/(success_count+fail_count)*100:.1f}%\n")
        f.write(f"  Primary file type: {primary_type}\n\n")
        
        if NORMALIZATION_TYPE == "global":
            f.write(f"Global Statistics Used:\n")
            f.write(f"  Global mean: {global_mean:.6e}\n")
            f.write(f"  Global std: {global_std:.6e}\n\n")
        
        f.write(f"Before Normalization:\n")
        f.write(f"  Mean range: {results_df['original_mean'].min():.2e} to {results_df['original_mean'].max():.2e}\n")
        f.write(f"  Std range: {results_df['original_std'].min():.2e} to {results_df['original_std'].max():.2e}\n")
        f.write(f"  Data range: {results_df['original_min'].min():.2e} to {results_df['original_max'].max():.2e}\n")
        f.write(f"  Average mean: {results_df['original_mean'].mean():.2e}\n")
        f.write(f"  Average std: {results_df['original_std'].mean():.2e}\n\n")
        
        f.write(f"After Normalization:\n")
        f.write(f"  Mean range: {results_df['normalized_mean'].min():.2e} to {results_df['normalized_mean'].max():.2e}\n")
        f.write(f"  Std range: {results_df['normalized_std'].min():.2e} to {results_df['normalized_std'].max():.2e}\n")
        f.write(f"  Data range: {results_df['normalized_min'].min():.2e} to {results_df['normalized_max'].max():.2e}\n")
        f.write(f"  Average mean: {results_df['normalized_mean'].mean():.2e}\n")
        f.write(f"  Average std: {results_df['normalized_std'].mean():.2e}\n\n")
        
        if NORMALIZATION_TYPE == "per_file":
            mean_close_to_zero = np.abs(results_df['normalized_mean']) < 1e-10
            std_close_to_one = np.abs(results_df['normalized_std'] - 1.0) < 1e-10
            f.write(f"Quality Check:\n")
            f.write(f"  Files with mean ≈ 0: {mean_close_to_zero.sum()}/{len(results_df)} ({mean_close_to_zero.mean()*100:.1f}%)\n")
            f.write(f"  Files with std ≈ 1: {std_close_to_one.sum()}/{len(results_df)} ({std_close_to_one.mean()*100:.1f}%)\n")
    
    # Console output (UTF-8 safe)
    print(f"\n=== FILES SAVED ===")
    print(f"✓ {len(results_df)} normalized data files")
    print(f"✓ Results: {results_csv}")
    print(f"✓ Results: {results_pkl}")
    print(f"✓ Summary: {summary_file}")
    
    print(f"\n=== USAGE ===")
    print(f"Load normalized data:")
    print(f"  data = np.load('normalized_data/path/to/file.npy')")
    print(f"\nLoad results metadata:")
    print(f"  results = pd.read_csv('{results_csv}')")
    print(f"\nFind specific files:")
    print(f"  # Original file path to find normalized version")
    print(f"  norm_file = results[results['original_file'] == 'path']['normalized_file'].iloc[0]")
    
    print(f"\n Z-SCORE NORMALIZATION COMPLETE!")
    print(f"Your data is now:")
    if NORMALIZATION_TYPE == "per_file":
        print(f"  • Each file: mean ≈ 0, std ≈ 1")
    elif NORMALIZATION_TYPE == "global":
        print(f"  • All files normalized using global statistics")
    elif NORMALIZATION_TYPE == "per_channel":
        print(f"  • Each channel normalized independently")
    print(f"  • Ready for machine learning and analysis")
    print(f"  • Saved in: {OUTPUT_DIR}")
    
else:
    print("❌ No normalization results to save")



=== FILES SAVED ===
✓ 696320 normalized data files
✓ Results: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data/metadata/normalization_results.csv
✓ Results: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data/metadata/normalization_results.pkl
✓ Summary: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data/metadata/normalization_summary.txt

=== USAGE ===
Load normalized data:
  data = np.load('normalized_data/path/to/file.npy')

Load results metadata:
  results = pd.read_csv('D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\Dataset\normalized_data/metadata/normalization_results.csv')

Find specific files:
  # Original file path to find normalized version
  norm_file = results[results['original_file'] == 'path']['normalized_file'].iloc[0]

 Z-SCORE NORMALIZATION COMPLETE!
Your data is now:
  • Each file: mean ≈ 0, std ≈ 1
  • Ready for machine learning a