In [1]:
filename = 'D:/Code/DRAFT/zuco/task2-TSR/preprocessed/YAC/bip_YAC_TSR4_EEG.mat'

In [3]:
import h5py

f = h5py.File(filename, 'r')
f['EEG'].keys()

<KeysViewHDF5 ['chaninfo', 'chanlocs', 'comments', 'condition', 'data', 'datfile', 'dipfit', 'epoch', 'epochdescription', 'etc', 'event', 'eventdescription', 'filename', 'filepath', 'group', 'history', 'icaact', 'icachansind', 'icasphere', 'icasplinefile', 'icaweights', 'icawinv', 'nbchan', 'pnts', 'ref', 'reject', 'saved', 'session', 'setname', 'specdata', 'specicaact', 'splinefile', 'srate', 'stats', 'subject', 'times', 'trials', 'urchanlocs', 'urevent', 'xmax', 'xmin']>

In [7]:
import numpy as np
import h5py

# Open the file
f = h5py.File(filename, 'r')

# Access the labels
labels = f['EEG']['chanlocs']['labels']

# Convert the labels to readable strings
channel_names = []
for i in range(labels.shape[0]):
    # Get the reference
    label_ref = labels[i, 0]
    
    # Dereference and get the data
    label_data = f[label_ref][()]
    
    # The data appears to be ASCII values in a nested array format
    # Convert ASCII values to characters
    if isinstance(label_data, np.ndarray):
        # Flatten the array and convert ASCII codes to characters
        ascii_values = label_data.flatten()
        # Filter out any zeros or newline characters
        ascii_values = ascii_values[ascii_values > 0]
        # Convert to string
        channel_name = ''.join([chr(int(val)) for val in ascii_values if val != 10])  # 10 is newline
    else:
        # If it's already a string, try to parse it
        label_str = str(label_data)
        # Extract numbers from the string format '[[69]\n [50]]'
        import re
        numbers = re.findall(r'\d+', label_str)
        channel_name = ''.join([chr(int(num)) for num in numbers])
    
    channel_names.append(channel_name)

print(f"Number of channels: {len(channel_names)}")
print(f"First 10 channels: {channel_names[:10]}")

# Create a mapping dictionary
channel_mapping = {i: name for i, name in enumerate(channel_names)}

# Verify the decoding - channels should have names like 'E1', 'E2', etc.
for i in range(min(10, len(channel_names))):
    print(f"Channel {i}: {channel_names[i]}")

Number of channels: 105
First 10 channels: ['E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E9', 'E10', 'E11', 'E12']
Channel 0: E2
Channel 1: E3
Channel 2: E4
Channel 3: E5
Channel 4: E6
Channel 5: E7
Channel 6: E9
Channel 7: E10
Channel 8: E11
Channel 9: E12


In [6]:
# Alternative approach - check the exact structure of labels
print(f"Labels shape: {labels.shape}")
print(f"Labels dtype: {labels.dtype}")

# If labels are stored as a 2D array of references
channel_names = []
for i in range(105):  # or labels.shape[0]
    try:
        # Try different ways to access the label
        if labels.shape == (105, 1):
            label_ref = labels[i, 0]
        else:
            label_ref = labels[i]
        
        # Dereference and decode
        label_str = f[label_ref][()].tobytes().decode('utf-8').strip('\x00')
        channel_names.append(label_str)
    except Exception as e:
        print(f"Error at index {i}: {e}")

# Create a mapping dictionary for easy lookup
channel_mapping = {i: name for i, name in enumerate(channel_names)}
print(f"Channel 0: {channel_mapping[0]}")
print(f"Channel 1: {channel_mapping[1]}")

Labels shape: (105, 1)
Labels dtype: object
Channel 0: E 2
Channel 1: E 3


In [8]:
import numpy as np
import h5py
import json
import re

# Open the file
f = h5py.File(filename, 'r')

# Access the labels
labels = f['EEG']['chanlocs']['labels']

# Convert the labels to readable strings
channel_names = []
for i in range(labels.shape[0]):
    # Get the reference
    label_ref = labels[i, 0]
    
    # Dereference and get the data
    label_data = f[label_ref][()]
    
    # Convert ASCII values to characters
    if isinstance(label_data, np.ndarray):
        # Flatten the array and convert ASCII codes to characters
        ascii_values = label_data.flatten()
        # Filter out any zeros or newline characters (10 is newline)
        ascii_values = ascii_values[(ascii_values > 0) & (ascii_values != 10)]
        # Convert to string
        channel_name = ''.join([chr(int(val)) for val in ascii_values])
    else:
        # If it's already a string, try to parse it
        label_str = str(label_data)
        # Extract numbers from the string format '[[69]\n [50]]'
        numbers = re.findall(r'\d+', label_str)
        # Convert ASCII codes to characters
        channel_name = ''.join([chr(int(num)) for num in numbers if int(num) != 10])
    
    channel_names.append(channel_name.strip())

# Create a dictionary with index as key and channel name as value
channel_dict = {i: name for i, name in enumerate(channel_names)}

# Also create a reverse mapping (channel name to index)
channel_to_index = {name: i for i, name in enumerate(channel_names)}

# Create a comprehensive dictionary with both mappings
channel_mapping = {
    "index_to_channel": channel_dict,
    "channel_to_index": channel_to_index,
    "channel_list": channel_names,
    "total_channels": len(channel_names)
}

# Save to JSON file
output_filename = "eeg_channel_mapping.json"
with open(output_filename, 'w') as json_file:
    json.dump(channel_mapping, json_file, indent=2)

print(f"Channel mapping saved to {output_filename}")
print(f"Total channels: {len(channel_names)}")
print(f"First 10 channels: {channel_names[:10]}")
print(f"Last 5 channels: {channel_names[-5:]}")

# Close the HDF5 file
f.close()

# Verify the JSON file was created correctly
with open(output_filename, 'r') as json_file:
    loaded_data = json.load(json_file)
    print(f"\nJSON file contains {loaded_data['total_channels']} channels")
    print("Sample mapping (first 5):")
    for i in range(min(5, loaded_data['total_channels'])):
        print(f"  Index {i} -> Channel '{loaded_data['index_to_channel'][str(i)]}'")

Channel mapping saved to eeg_channel_mapping.json
Total channels: 105
First 10 channels: ['E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E9', 'E10', 'E11', 'E12']
Last 5 channels: ['E121', 'E122', 'E123', 'E124', 'Cz']

JSON file contains 105 channels
Sample mapping (first 5):
  Index 0 -> Channel 'E2'
  Index 1 -> Channel 'E3'
  Index 2 -> Channel 'E4'
  Index 3 -> Channel 'E5'
  Index 4 -> Channel 'E6'
