# Organize EEG Scalogram Files by Channel and Direction

This notebook organizes EEG scalogram files into a structured directory hierarchy based on:
- Channel codes (A1, B22, etc.)
- Movement directions (Up, Down, Left, Right)

Example input file: `sub-01_ses-01_trial_040_Inner_Down_A1.png`
Will be organized as: `dataset/A1/Down/sub-01_ses-01_trial_040_Inner_Down_A1.png`

In [6]:
import os
import shutil
from pathlib import Path
from tqdm.notebook import tqdm
import re

print("Libraries imported successfully!")

Libraries imported successfully!


In [7]:
# Define paths
SOURCE_DIR = r'D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\data'
DEST_DIR = r'D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\dataset'

# Define valid channel codes
CHANNEL_CODES = [
    'D19', 'D28', 'D23', 'B22', 'D21', 'D3', 'D7', 'D20',
    'C12', 'B31', 'B27', 'A3', 'D15', 'C17', 'C21', 'D31',
    'D5', 'C7', 'A1', 'C24'
]

# Define valid directions
DIRECTIONS = ['Up', 'Down', 'Left', 'Right']

print(f"Source directory: {SOURCE_DIR}")
print(f"Destination directory: {DEST_DIR}")
print(f"Number of channels: {len(CHANNEL_CODES)}")
print(f"Directions: {', '.join(DIRECTIONS)}")

Source directory: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\data
Destination directory: D:\VIT\IV-Year\PJT-I\Speech Imagery Decoding\Inner_Speech_Dataset\dataset
Number of channels: 20
Directions: Up, Down, Left, Right


In [8]:
# Create directory structure
for channel in CHANNEL_CODES:
    for direction in DIRECTIONS:
        # Create path like dataset/A1/Up/
        dir_path = os.path.join(DEST_DIR, channel, direction)
        os.makedirs(dir_path, exist_ok=True)

print("✅ Directory structure created successfully")

✅ Directory structure created successfully


In [None]:
def extract_info_from_filename(filename):
    """Extract channel from filename."""
    # Find channel code
    channel = None
    for code in CHANNEL_CODES:
        if filename.endswith(f"_{code}.png"):
            channel = code
            break
    return channel

# Process files
processed_count = 0
skipped_count = 0
file_distribution = {channel: {direction: 0 for direction in DIRECTIONS} for channel in CHANNEL_CODES}

# Process each direction folder
for direction in DIRECTIONS:
    direction_path = os.path.join(SOURCE_DIR, direction)
    if not os.path.exists(direction_path):
        print(f"Warning: Direction folder {direction} not found in {SOURCE_DIR}")
        continue

    # Get all PNG files in this direction folder
    png_files = [f for f in os.listdir(direction_path) if f.endswith('.png')]
    
    print(f"\nProcessing {direction} folder: {len(png_files)} files found")
    
    for filename in tqdm(png_files, desc=f"Processing {direction} files"):
        # Extract channel
        channel = extract_info_from_filename(filename)
        
        if channel:
            # Source and destination paths
            src_path = os.path.join(direction_path, filename)
            dst_path = os.path.join(DEST_DIR, channel, direction, filename)
            
            # Create destination directory if it doesn't exist
            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
            
            try:
                # Copy file
                shutil.copy2(src_path, dst_path)
                
                # Update statistics
                processed_count += 1
                file_distribution[channel][direction] += 1
            except Exception as e:
                print(f"Error copying {filename}: {str(e)}")
                skipped_count += 1
        else:
            print(f"Skipping {filename} - Could not extract channel")
            skipped_count += 1

print(f"\n=== Processing Complete ===")
print(f"✓ Processed: {processed_count} files")
print(f"✗ Skipped: {skipped_count} files")


Processing Up folder: 10780 files found


Processing Up files:   0%|          | 0/10780 [00:00<?, ?it/s]


Processing Down folder: 10780 files found


Processing Down files:   0%|          | 0/10780 [00:00<?, ?it/s]

In [None]:
# Print distribution statistics
print("=== File Distribution ===\n")

# Print header
print(f"{'Channel':6} | {'Up':4} | {'Down':4} | {'Left':4} | {'Right':4} | {'Total':5}")
print("-" * 45)

# Print statistics for each channel
for channel in CHANNEL_CODES:
    counts = file_distribution[channel]
    total = sum(counts.values())
    if total > 0:  # Only show channels with files
        print(f"{channel:6} | {counts['Up']:4} | {counts['Down']:4} | {counts['Left']:4} | {counts['Right']:4} | {total:5}")

# Print totals
print("-" * 45)
direction_totals = {
    direction: sum(file_distribution[channel][direction] for channel in CHANNEL_CODES)
    for direction in DIRECTIONS
}
grand_total = sum(direction_totals.values())
print(f"{'Total':6} | {direction_totals['Up']:4} | {direction_totals['Down']:4} | {direction_totals['Left']:4} | {direction_totals['Right']:4} | {grand_total:5}")

=== File Distribution ===

Channel | Up   | Down | Left | Right | Total
---------------------------------------------
---------------------------------------------
Total  |    0 |    0 |    0 |    0 |     0
