# Edge Case Analysis for Preprocessing Errors

This notebook focuses on debugging two problematic subjects (e.g., Sub12 and Sub41) whose files produce errors during preprocessing.

The errors include:
- "Unable to synchronously open object (object 'cnt' doesn't exist)"
- "Accessing a group is done with bytes or str, not <class 'tuple'>"

These errors and a few more, prevented us take full advantage of data, because it truncated majority of good data while considering it bad or by simply not accessing it

This notebook is for analyzing these edge cases and the update the preprocessor module

I'll analyze two subjects: Sub12 and Sub41.

In [1]:
# imports
import os
import glob
import numpy as np
import h5py
import mne

In [2]:
def safe_get(dataset, key):
    """
    Safely retrieve an object from an h5py group using a key.
    If the key is a tuple, use its first element.
    If the key is bytes, decode it to a string.
    Returns the dataset if successful, or None.
    """
    if isinstance(key, tuple):
        key = key[0]
    if isinstance(key, bytes):
        key = key.decode('utf-8')
    try:
        obj = dataset[key]
        if isinstance(obj, tuple):
            obj = obj[0]
        return obj[()]
    except Exception as e:
        print(f"Error retrieving key '{key}': {e}")
        return None

In [3]:
def analyze_group(group, label=""):
    """
    Print detailed information for all keys in an h5py group.
    For each key, print its type, dtype (if available), shape (if available), and a data preview.
    """
    print(f"\nDetailed analysis for group {label}:")
    for key in group.keys():
        try:
            obj = group[key]
            dtype_info = obj.dtype if hasattr(obj, 'dtype') else 'N/A'
            shape_info = obj.shape if hasattr(obj, 'shape') else 'N/A'
            print(f"  Key: {key} | Type: {type(obj)} | Dtype: {dtype_info} | Shape: {shape_info}")
            try:
                data = obj[()]
                preview = np.array(data).flatten()[:10]
                print(f"    Data preview: {preview}")
            except Exception as e:
                print(f"    Error retrieving data: {e}")
        except Exception as e:
            print(f"Error accessing key '{key}': {e}")

In [4]:
def analyze_marker_file(file_path):
    """
    Open an mrk.mat file with h5py, identify the marker group, and analyze its keys.
    Focus especially on the 'event' key and its subkeys.
    """
    print(f"\nAnalyzing marker file: {file_path}")
    try:
        with h5py.File(file_path, mode="r") as f:
            top_keys = list(f.keys())
            print("Top-level keys:", top_keys)

            # Identify a marker group key (ignore '#refs#')
            marker_key = None
            for key in top_keys:
                k_decoded = key.decode('utf-8') if isinstance(key, bytes) else key
                if k_decoded.lower().startswith("mrk") and k_decoded != "#refs#":
                    marker_key = k_decoded
                    break
            if marker_key is None:
                print("No valid marker group key found.")
                return
            print("Using marker group key:", marker_key)
            mrk_group = f[marker_key]
            print("Keys in marker group:", list(mrk_group.keys()))
            analyze_group(mrk_group, label="marker group")

            # Focus on 'event' key:
            if "event" in mrk_group:
                event_obj = mrk_group["event"]
                print("\nAnalysis of 'event' key:")
                print("Type of 'event' object:", type(event_obj))
                if isinstance(event_obj, h5py.Group):
                    analyze_group(event_obj, label="'event' group")
                else:
                    try:
                        data = event_obj[()]
                        print("Retrieved event data. Shape:", np.array(data).shape)
                    except Exception as e:
                        print("Error retrieving event data:", e)
            else:
                print("'event' key not found in marker group.")
    except Exception as e:
        print(f"Error loading marker file {file_path}: {e}")

In [5]:
def analyze_cnt_file(file_path):
    """
    Open a cnt.mat (or alternative) file with h5py, list its top-level keys, and analyze the 'cnt' group if available.
    """
    print(f"\nAnalyzing cnt file: {file_path}")
    try:
        with h5py.File(file_path, mode="r") as f:
            top_keys = list(f.keys())
            print("Top-level keys:", top_keys)
            # Look for a key starting with "cnt"
            cnt_key = None
            for key in top_keys:
                k_decoded = key.decode('utf-8') if isinstance(key, bytes) else key
                if k_decoded.lower().startswith("cnt"):
                    cnt_key = k_decoded
                    break
            if cnt_key is None:
                print("No valid cnt group key found.")
                return
            print("Using cnt group key:", cnt_key)
            cnt_group = f[cnt_key]
            print("Keys in cnt group:", list(cnt_group.keys()))
            analyze_group(cnt_group, label="cnt group")
    except Exception as e:
        print(f"Error loading cnt file {file_path}: {e}")

## 5. Analyze Edge cases

In [6]:
# Define raw directories for Sub12 and Sub41
raw_root = "../data/raw/"

# For Sub12:
sub12_eeg_folder = os.path.join(raw_root, "Sub12", "EEG")
sub12_cnt_files = sorted([os.path.join(sub12_eeg_folder, f) for f in os.listdir(sub12_eeg_folder) if f.startswith("cnt") and f.endswith(".mat")])
sub12_mrk_files = sorted([os.path.join(sub12_eeg_folder, f) for f in os.listdir(sub12_eeg_folder) if f.startswith("mrk") and f.endswith(".mat")])
print("Sub12 cnt files:", sub12_cnt_files)
print("Sub12 mrk files:", sub12_mrk_files)

print("\n--- Analyzing Sub12 cnt files ---")
for fpath in sub12_cnt_files:
    analyze_cnt_file(fpath)

print("\n--- Analyzing Sub12 mrk files ---")
for fpath in sub12_mrk_files:
    analyze_marker_file(fpath)

Sub12 cnt files: ['../data/raw/Sub12/EEG/cnt1.mat', '../data/raw/Sub12/EEG/cnt2.mat', '../data/raw/Sub12/EEG/cnt3.mat']
Sub12 mrk files: ['../data/raw/Sub12/EEG/mrk1.mat', '../data/raw/Sub12/EEG/mrk2.mat', '../data/raw/Sub12/EEG/mrk3.mat']

--- Analyzing Sub12 cnt files ---

Analyzing cnt file: ../data/raw/Sub12/EEG/cnt1.mat
Top-level keys: ['#refs#', 'cnt1']
Using cnt group key: cnt1
Keys in cnt group: ['EEG', 'Gender', 'HumanFactor', 'clab', 'fs']

Detailed analysis for group cnt group:
  Key: EEG | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: float64 | Shape: (116380, 63)
    Data preview: [-3621.62890625 -5873.48339844  2845.75048828  4293.06396484
  3119.28540039 -1651.90344238  -289.40414429 -5295.94433594
 10431.29394531 -8624.89746094]
  Key: Gender | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: uint16 | Shape: (3, 1)
    Data preview: [77 65 78]
  Key: HumanFactor | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: uint16 | Shape: (13, 1)
    Data preview: [ 69  97 1

In [7]:
# For Sub41:
sub41_eeg_folder = os.path.join(raw_root, "Sub41", "EEG")
sub41_cnt_file = os.path.join(sub41_eeg_folder, "cnt.mat")
sub41_mrk_file = os.path.join(sub41_eeg_folder, "mrk.mat")
print("\nSub41 cnt file exists:", os.path.exists(sub41_cnt_file))
print("Sub41 mrk file exists:", os.path.exists(sub41_mrk_file))

print("\n--- Analyzing Sub41 cnt.mat ---")
analyze_cnt_file(sub41_cnt_file)

print("\n--- Analyzing Sub41 mrk.mat ---")
analyze_marker_file(sub41_mrk_file)


Sub41 cnt file exists: True
Sub41 mrk file exists: True

--- Analyzing Sub41 cnt.mat ---

Analyzing cnt file: ../data/raw/Sub41/EEG/cnt.mat
Top-level keys: ['#refs#', 'cnt']
Using cnt group key: cnt
Keys in cnt group: ['EEG', 'Gender', 'HumanFactor', 'clab', 'fs']

Detailed analysis for group cnt group:
  Key: EEG | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: float64 | Shape: (2955500, 63)
    Data preview: [  2356.98120117  -3928.66015625   3987.7421875    4123.6796875
  -6991.93994141  12704.97363281  -1935.49707031   1041.21044922
 -12767.52246094 -20842.12890625]
  Key: Gender | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: uint16 | Shape: (5, 1)
    Data preview: [87 79 77 65 78]
  Key: HumanFactor | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: uint16 | Shape: (13, 1)
    Data preview: [ 69  97 114 108 121  32  65 100 111 112]
  Key: clab | Type: <class 'h5py._hl.dataset.Dataset'> | Dtype: object | Shape: (63, 1)
    Data preview: [<HDF5 object reference> <HDF5 obj

In [9]:
# for bad channel analysis of subject 37

raw_file_path = "../data/preprocessed/Sub37/preprocessed_epochs-epo.fif"  # or another Raw file if available
try:
    raw_sub37 = mne.io.read_raw_fif(raw_file_path, preload=True)
    print("Loaded Sub37 raw object with shape:", raw_sub37.get_data().shape)
except Exception as e:
    print("Error loading Sub37 raw object:", e)

# Now, try running find_bad_channels_maxwell and catch the error.
try:
    bads = mne.preprocessing.find_bad_channels_maxwell(raw_sub37)
    print("Bad channels found:", bads)
except Exception as e:
    print("Caught error in find_bad_channels_maxwell:", e)

Opening raw data file ../data/preprocessed/Sub37/preprocessed_epochs-epo.fif...
Isotrak not found
Error loading Sub37 raw object: No raw data in /Users/rahul/PycharmProjects/Semester 5/gnn-project/exploratory notebooks/../data/preprocessed/Sub37/preprocessed_epochs-epo.fif
Caught error in find_bad_channels_maxwell: name 'raw_sub37' is not defined


  raw_sub37 = mne.io.read_raw_fif(raw_file_path, preload=True)


In [10]:
import os
import shutil
import mne

# Define file paths
old_file = "../data/preprocessed/Sub37/preprocessed_epochs-epo.fif"
new_file = "../data/preprocessed/Sub37/Sub37_raw.fif"

# Check if the old file exists
if os.path.exists(old_file):
    print("Old file exists. Attempting to copy to a new conforming filename...")
    try:
        shutil.copy(old_file, new_file)
        print(f"File successfully copied to {new_file}")
    except Exception as e:
        print("Error copying file:", e)
else:
    print("Old file does not exist. Please verify your file paths.")

# Try to load the new file with MNE
try:
    raw_sub37 = mne.io.read_raw_fif(new_file, preload=True)
    print("Loaded Sub37 raw object successfully.")
    print("Raw data shape:", raw_sub37.get_data().shape)
except Exception as e:
    print("Error loading Sub37 raw object from new file:", e)

Old file exists. Attempting to copy to a new conforming filename...
File successfully copied to ../data/preprocessed/Sub37/Sub37_raw.fif
Opening raw data file ../data/preprocessed/Sub37/Sub37_raw.fif...
Isotrak not found
Error loading Sub37 raw object from new file: No raw data in /Users/rahul/PycharmProjects/Semester 5/gnn-project/exploratory notebooks/../data/preprocessed/Sub37/Sub37_raw.fif


In [11]:
epochs_sub37 = mne.read_epochs("../data/preprocessed/Sub37/preprocessed_epochs-epo.fif", preload=True)
print("Loaded epochs with shape:", epochs_sub37.get_data().shape)

Reading /Users/rahul/PycharmProjects/Semester 5/gnn-project/exploratory notebooks/../data/preprocessed/Sub37/preprocessed_epochs-epo.fif ...
Isotrak not found
    Found the data of interest:
        t =       0.00 ...    5000.00 ms
        0 CTF compensation matrices available
Not setting metadata
22 matching events found
No baseline correction applied
0 projection items activated
Loaded epochs with shape: (22, 63, 1001)
