* list down all the folders

In [1]:
from pathlib import Path

base_path = Path(r"D:\Dermerzel\SomnasNest\Parkinson\Data")

folders = [p.name for p in base_path.iterdir() if p.is_dir()]

for folder in folders:
    print(folder)


sub-PD1001
sub-PD1011
sub-PD1021
sub-PD1031
sub-PD1041
sub-PD1051
sub-PD1061
sub-PD1071
sub-PD1081
sub-PD1091
sub-PD1101
sub-PD1111
sub-PD1121
sub-PD1131
sub-PD1141
sub-PD1151
sub-PD1161
sub-PD1171
sub-PD1181
sub-PD1191
sub-PD1201


* load sub-01 EEG data

In [2]:
pip install mne

Note: you may need to restart the kernel to use updated packages.


In [3]:
import mne
from pathlib import Path

# Path to subject folder
sub_path = Path(r"D:\Dermerzel\SomnasNest\Parkinson\Data\sub-PD1001")

# Find the .vhdr file
vhdr_file = list(sub_path.glob("*.vhdr"))[0]

# Load EEG data
raw = mne.io.read_raw_brainvision(vhdr_file, preload=True)

# Print basic info
print(raw)


Extracting parameters from D:\Dermerzel\SomnasNest\Parkinson\Data\sub-PD1001\sub-PD1001_ses-01_task-rest_eeg.vhdr...
Setting channel info structure...


  raw = mne.io.read_raw_brainvision(vhdr_file, preload=True)


Reading 0 ... 140829  =      0.000 ...   281.658 secs...
<RawBrainVision | sub-PD1001_ses-01_task-rest_eeg.eeg, 63 x 140830 (281.7 s), ~67.8 MiB, data loaded>


In [4]:
# Show channel names
print(raw.ch_names)

['Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9', 'CP5', 'CP1', 'P3', 'P7', 'O1', 'Oz', 'O2', 'P4', 'P8', 'TP10', 'CP6', 'CP2', 'Cz', 'C4', 'T8', 'FT10', 'FC6', 'FC2', 'F4', 'F8', 'Fp2', 'AF7', 'AF3', 'AFz', 'F1', 'F5', 'FT7', 'FC3', 'C1', 'C5', 'TP7', 'CP3', 'P1', 'P5', 'PO7', 'PO3', 'POz', 'PO4', 'PO8', 'P6', 'P2', 'CPz', 'CP4', 'TP8', 'C6', 'C2', 'FC4', 'FT8', 'F6', 'AF8', 'AF4', 'F2', 'FCz']


* save postive class subjects as numpy array

In [12]:
# If needed: pip install mne numpy

from pathlib import Path  # for handling folder paths
import mne  # for loading BrainVision EEG data
import numpy as np  # for numerical operations

# ----------------------- CONFIG -----------------------
root_dir = Path(r"D:\Dermerzel\SomnasNest\Parkinson\Data")  # root dataset folder
exclude_channel = "FCz"  # channel to exclude
# ------------------------------------------------------

print(f"[INFO] Root directory: {root_dir}")  # print root directory

subfolders = [p for p in root_dir.iterdir() if p.is_dir()]  # get all subfolders
print(f"[INFO] Found {len(subfolders)} folders\n")  # print number of folders

min_length = None  # will store minimum number of samples found

for folder in subfolders:  # loop through each folder
    print("=" * 70)
    print(f"[INFO] Processing folder: {folder.name}")  # print folder name

    vhdr_files = list(folder.glob("*.vhdr"))  # find vhdr files

    if not vhdr_files:  # if no vhdr file found
        print("[WARN] No .vhdr file found. Skipping.")
        continue  # move to next folder

    vhdr_path = vhdr_files[0]  # usually one vhdr per folder
    print(f"[INFO] Loading file: {vhdr_path.name}")  # print file name

    try:
        raw = mne.io.read_raw_brainvision(
            str(vhdr_path),
            preload=True,
            verbose="ERROR"
        )  # load EEG data

        # Create lowercase mapping of channel names
        ch_map = {ch.lower(): ch for ch in raw.ch_names}

        # Remove FCz if present
        if exclude_channel.lower() in ch_map:
            raw.drop_channels([ch_map[exclude_channel.lower()]])
            print(f"[INFO] Dropped channel: {exclude_channel}")
        else:
            print(f"[INFO] Channel '{exclude_channel}' not found")

        data = raw.get_data()  # get EEG data (channels x samples)

        n_channels = data.shape[0]  # number of channels
        n_samples = data.shape[1]  # number of samples

        print(f"[INFO] EEG shape (channels x samples): {data.shape}")
        print(f"[INFO] Number of channels: {n_channels}")
        print(f"[INFO] Number of samples: {n_samples}")

        # Update minimum length
        if min_length is None or n_samples < min_length:
            min_length = n_samples
            print(f"[INFO] New minimum length found: {min_length}")

    except Exception as e:
        print("[ERROR] Failed to load EEG")
        print(f"[ERROR] {repr(e)}")

print("\n" + "=" * 70)
print(f"[RESULT] Minimum EEG array length (samples): {min_length}")
print("[INFO] Done.")


[INFO] Root directory: D:\Dermerzel\SomnasNest\Parkinson\Data
[INFO] Found 22 folders

[INFO] Processing folder: numpy_arrays
[WARN] No .vhdr file found. Skipping.
[INFO] Processing folder: sub-PD1001
[INFO] Loading file: sub-PD1001_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] EEG shape (channels x samples): (62, 140830)
[INFO] Number of channels: 62
[INFO] Number of samples: 140830
[INFO] New minimum length found: 140830
[INFO] Processing folder: sub-PD1011
[INFO] Loading file: sub-PD1011_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] EEG shape (channels x samples): (62, 163020)
[INFO] Number of channels: 62
[INFO] Number of samples: 163020
[INFO] Processing folder: sub-PD1021
[INFO] Loading file: sub-PD1021_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] EEG shape (channels x samples): (62, 126180)
[INFO] Number of channels: 62
[INFO] Number of samples: 126180
[INFO] New minimum length found: 126180
[INFO] Processing folder: sub-PD1031
[INFO

In [13]:
# If needed: pip install mne numpy

from pathlib import Path  # for file and folder handling
import numpy as np  # for numpy arrays
import mne  # for loading BrainVision EEG files

# ----------------------- CONFIG -----------------------
root_dir = Path(r"D:\Dermerzel\SomnasNest\Parkinson\Data")  # dataset root
exclude_channel = "FCz"  # channel to remove
timepoints = 60000  # number of timepoints to keep
output_file = root_dir / "parkinson_positive.npy"  # output file
# ------------------------------------------------------

print(f"[INFO] Root directory: {root_dir}")

subfolders = sorted([p for p in root_dir.iterdir() if p.is_dir()])  # list folders
print(f"[INFO] Found {len(subfolders)} folders")

all_samples = []  # will store EEG arrays from all folders

for folder in subfolders:
    print("\n" + "=" * 70)
    print(f"[INFO] Processing folder: {folder.name}")

    vhdr_files = list(folder.glob("*.vhdr"))  # find vhdr file

    if not vhdr_files:
        print("[WARN] No .vhdr file found. Skipping.")
        continue

    vhdr_path = vhdr_files[0]
    print(f"[INFO] Loading EEG: {vhdr_path.name}")

    try:
        raw = mne.io.read_raw_brainvision(
            str(vhdr_path),
            preload=True,
            verbose="ERROR"
        )  # load EEG data

        # Map channel names to lowercase for safe comparison
        ch_map = {ch.lower(): ch for ch in raw.ch_names}

        # Drop FCz if present
        if exclude_channel.lower() in ch_map:
            raw.drop_channels([ch_map[exclude_channel.lower()]])
            print(f"[INFO] Dropped channel: {exclude_channel}")
        else:
            print(f"[INFO] Channel '{exclude_channel}' not found")

        data = raw.get_data()  # shape: (channels, samples)
        print(f"[INFO] Raw data shape (channels x samples): {data.shape}")

        # Check if enough timepoints exist
        if data.shape[1] < timepoints:
            print(f"[WARN] Not enough timepoints ({data.shape[1]}). Skipping.")
            continue

        # Select last 60000 timepoints
        data = data[:, -timepoints:]  # (channels, 60000)
        print(f"[INFO] After slicing last {timepoints} timepoints: {data.shape}")

        # Transpose to (channels stay second dim later)
        data = data.astype(np.float32)  # ensure consistent dtype

        all_samples.append(data)  # add to list
        print(f"[INFO] Sample added. Current count: {len(all_samples)}")

    except Exception as e:
        print("[ERROR] Failed to process EEG")
        print(f"[ERROR] {repr(e)}")

print("\n" + "=" * 70)

# Stack into final numpy array
final_array = np.stack(all_samples, axis=0)  
# shape: (num_samples, num_channels, timepoints)

print(f"[RESULT] Final array shape: {final_array.shape}")

# Save final array
np.save(output_file, final_array)
print(f"[RESULT] Saved array to: {output_file}")

print("[INFO] Done.")


[INFO] Root directory: D:\Dermerzel\SomnasNest\Parkinson\Data
[INFO] Found 22 folders

[INFO] Processing folder: numpy_arrays
[WARN] No .vhdr file found. Skipping.

[INFO] Processing folder: sub-PD1001
[INFO] Loading EEG: sub-PD1001_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] Raw data shape (channels x samples): (62, 140830)
[INFO] After slicing last 60000 timepoints: (62, 60000)
[INFO] Sample added. Current count: 1

[INFO] Processing folder: sub-PD1011
[INFO] Loading EEG: sub-PD1011_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] Raw data shape (channels x samples): (62, 163020)
[INFO] After slicing last 60000 timepoints: (62, 60000)
[INFO] Sample added. Current count: 2

[INFO] Processing folder: sub-PD1021
[INFO] Loading EEG: sub-PD1021_ses-01_task-rest_eeg.vhdr
[INFO] Dropped channel: FCz
[INFO] Raw data shape (channels x samples): (62, 126180)
[INFO] After slicing last 60000 timepoints: (62, 60000)
[INFO] Sample added. Current count: 3

[INFO] Proces

* non-parkinson array

In [14]:
import mne
import numpy as np
from pathlib import Path

print("=== EEG Loading Started ===")

# --------------------------------------------------
# PATHS
# --------------------------------------------------
input_base_path = Path(r"D:\Dermerzel\SomnasNest\Alzheimer\Data")
output_path = Path(r"D:\Dermerzel\SomnasNest\Parkinson\Data\parkinson_negative.npy")

print(f"Input base path: {input_base_path}")
print(f"Output file: {output_path}")

TARGET_LENGTH = 60_000  # fixed number of time points

# --------------------------------------------------
# CHANNEL SELECTION (ORDER MATTERS)
# --------------------------------------------------
SELECTED_CHANNELS = [
    'Fp1', 'Fz', 'F3', 'F7', 'FT9', 'FC5', 'FC1', 'C3', 'T7', 'TP9',
    'CP5', 'CP1', 'P3', 'P7', 'O1', 'Oz', 'O2', 'P4', 'P8', 'TP10',
    'CP6', 'CP2', 'Cz', 'C4', 'T8', 'FT10', 'FC6', 'FC2', 'F4', 'F8',
    'Fp2', 'AF7', 'AF3', 'AFz', 'F1', 'F5', 'FT7', 'FC3', 'C1', 'C5',
    'TP7', 'CP3', 'P1', 'P5', 'PO7', 'PO3', 'POz', 'PO4', 'PO8',
    'P6', 'P2', 'CPz', 'CP4', 'TP8', 'C6', 'C2', 'FC4', 'FT8',
    'F6', 'AF8', 'AF4', 'F2'
]

eeg_data_list = []
loaded_subjects = []

# --------------------------------------------------
# STEP 1: LOAD + SELECT CHANNELS + CROP
# --------------------------------------------------
for i in range(1, 32):
    sub_id = f"sub-{i:02d}"
    sub_path = input_base_path / sub_id

    print("\n----------------------------------")
    print(f"Processing {sub_id}")
    print(f"Looking in: {sub_path}")

    if not sub_path.exists():
        print(f"❌ Folder not found: {sub_path}")
        continue

    vhdr_files = list(sub_path.glob("*.vhdr"))
    print(f"Found {len(vhdr_files)} .vhdr file(s)")

    if not vhdr_files:
        print(f"⚠️ No .vhdr file found in {sub_id}, skipping.")
        continue

    vhdr_file = vhdr_files[0]
    print(f"Using file: {vhdr_file.name}")

    try:
        print("→ Loading EEG data...")
        raw = mne.io.read_raw_brainvision(vhdr_file, preload=True, verbose=False)

        print("→ Picking selected EEG channels...")
        raw.pick_channels(SELECTED_CHANNELS, ordered=True)

        print("→ Extracting NumPy array...")
        data = raw.get_data()  # shape: (channels, time)

        n_channels, n_times = data.shape
        print(f"✔ Shape after channel selection: {data.shape}")

        if n_times < TARGET_LENGTH:
            print(f"⚠️ Skipping {sub_id}: only {n_times} samples (< {TARGET_LENGTH})")
            continue

        # Keep FIRST 60,000 samples
        cropped = data[:, :TARGET_LENGTH].astype(np.float16)
        print(f"✔ Cropped shape: {cropped.shape}")

        eeg_data_list.append(cropped)
        loaded_subjects.append(sub_id)

        print(f"✔ {sub_id} successfully processed.")

    except Exception as e:
        print(f"❌ Error loading {sub_id}: {e}")
        continue

# --------------------------------------------------
# STEP 2: STACK + SAVE
# --------------------------------------------------
print("\n==================================")
print("EEG Loading Finished")
print(f"Total subjects loaded: {len(eeg_data_list)}")
print(f"Subjects: {loaded_subjects}")

print("\n→ Stacking all subjects into one NumPy array...")

try:
    parkinson_negative = np.stack(eeg_data_list, axis=0)
    print("✔ Stacking successful.")
    print("Final shape:", parkinson_negative.shape)
    print("Data type:", parkinson_negative.dtype)

    print(f"→ Saving to {output_path}")
    np.save(output_path, parkinson_negative)
    print("✔ File saved successfully.")

except MemoryError as e:
    print("❌ Memory error during stacking!")
    print(e)

print("\n=== PROCESS COMPLETED ===")


=== EEG Loading Started ===
Input base path: D:\Dermerzel\SomnasNest\Alzheimer\Data
Output file: D:\Dermerzel\SomnasNest\Parkinson\Data\parkinson_negative.npy

----------------------------------
Processing sub-01
Looking in: D:\Dermerzel\SomnasNest\Alzheimer\Data\sub-01
Found 1 .vhdr file(s)
Using file: sub-01_task-rest_eeg.vhdr
→ Loading EEG data...
→ Picking selected EEG channels...
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
→ Extracting NumPy array...
✔ Shape after channel selection: (62, 661520)
✔ Cropped shape: (62, 60000)
✔ sub-01 successfully processed.

----------------------------------
Processing sub-02
Looking in: D:\Dermerzel\SomnasNest\Alzheimer\Data\sub-02
Found 1 .vhdr file(s)
Using file: sub-02_task-rest_eeg.vhdr
→ Loading EEG data...
→ Picking selected EEG channels...
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
→ Extracting NumPy array...
✔ Shape after channel selection: (62, 637720)
✔ Cropped sha