# Compute PSD, SNR, and Z-Score of Pilot Data

## Goals:
1. **Data Import**
    - Import the preprocessed data from npz and json files

2. **Feature Extraction**
    - Calculate the Power Spectral Density (PSD) for each epoch.
    - Calculate the Signal-to-Noise Ratio (SNR) for each epoch (for use in stats)
    - Calculate the Z-score for each epoch (used to determine signal cutoff):
        * **Formula**:   PSD Z-score = (PSD(single trial) - Mean PSD of baseline trials) / Std PSD of baseline trials
        
3. **Data Formatting**
    - Save the data and export to an excel sheet
        * Get the average of all epochs for each unique stimulus

# Import Libraries

In [1]:
# Standard libraries
import json
import numpy as np
import pandas as pd
import scipy.signal as signal
import matplotlib.pyplot as plt

# Custom libraries
from Functions import processing

%matplotlib qt

# Import Epoched Data and Settings

In [2]:
# Load list of files to import
files = [  
    "sub-P001_ses-S001_task-T1_run-001_eeg"   
]

# Get unique subject IDs
subject_ids = [file.split('_')[0] for file in files]
unique_subject_ids = list(set(subject_ids))

# Preallocate variables to store EEG data and settings
eeg_epochs = [None] * len(files)
settings = [None] * len(files)
rs_open_data = [None] * len(files)
#rs_closed_data = [None] * len(files)

# Import data
for f, file in enumerate(files):
    # Import EEG data, since it is stored in a compressed numpy file (.npz) we need to use the np.load function 
    loaded_data = np.load(f"Data\\Masters_testing\\{file}.npz", allow_pickle=True)

    # Access the data for each stimulus
    eeg_epochs[f] = {stim_label: loaded_data[stim_label] for stim_label in loaded_data.files}

    # Import settings
    with open(f"Data\\Masters_testing\\{file}.json", "r") as file_object:
        settings[f] = json.load(file_object)

    # Import RS eyes open data
    rs_open_data[f] = np.load(f"Data\\Masters_testing\\{file}_open.npy", allow_pickle=True)
    #rs_closed_data[f] = np.load(f"Data\\Masters_testing\\{file}_closed.npy", allow_pickle=True)

# Compute PSD for all Epochs

Make sure to include baseline epochs for later calculation of Z-score
Compute PSD for every epoch (and average for repeated labels after)

In [3]:
# PSD settings
window_size = 10  # Length of window for PSD [sec], gives a frequency resolution of 0.5 Hz so I will get a psd value for every 0.5 Hz

# Preallocate variables
eeg_f = [None] * len(files)
eeg_pxx = [None] * len(files)  # Preallocate to list in case not all files have the same number of channels

# Compute PSD for each file
for f, file in enumerate(files):
    # Preallocate lists to store PSD results for each stimulus
    eeg_f[f] = {}
    eeg_pxx[f] = {}

    # Compute PSD for each stimulus
    for stim_label, epochs in eeg_epochs[f].items():
        # Preallocate lists to store PSD results for each epoch
        eeg_f[f][stim_label] = []
        eeg_pxx[f][stim_label] = []

        # Compute PSD for each epoch
        for epoch in epochs:
            f_values, pxx_values = signal.welch(
                x=epoch,
                fs=settings[f]["eeg_srate"],
                nperseg=window_size * settings[f]["eeg_srate"]
            )
            eeg_f[f][stim_label].append(f_values)
            eeg_pxx[f][stim_label].append(pxx_values)

        # Convert lists to arrays for consistency
        eeg_f[f][stim_label] = np.array(eeg_f[f][stim_label])
        eeg_pxx[f][stim_label] = np.array(eeg_pxx[f][stim_label])

# Print the shape of the PSD results for each stimulus
#for f, file in enumerate(files):
#    print(f"File: {file}")
#    for stim_label in eeg_f[f].keys():
#        print(f"  Stimulus: {stim_label}, PSD shape: {eeg_pxx[f][stim_label].shape}")

# Compute PSD for Resting State

In [4]:
# PSD settings
window_size = 10 

# Preallocate variables
rs_open_f = [None] * len(files)
rs_open_pxx = [None] * len(files)
#rs_closed_f = [None] * len(files)
#rs_closed_pxx = [None] * len(files)

for f, file in enumerate(files):
# Compute PSD for npy_data
    rs_open_f[f] = []
    rs_open_pxx[f] = []
    #rs_closed_f[f] = []
    #rs_closed_pxx[f] = []

    for epoch in rs_open_data[f]:
        f_values, pxx_values = signal.welch(
            x=epoch,
            fs=settings[f]["eeg_srate"],
            nperseg=window_size * settings[f]["eeg_srate"]
        )
        rs_open_f[f].append(f_values)
        rs_open_pxx[f].append(pxx_values)

    #for epoch in rs_closed_data[f]:
    #    f_values, pxx_values = signal.welch(
    #        x=epoch,
    #        fs=settings[f]["eeg_srate"],
    #        nperseg=window_size * settings[f]["eeg_srate"]
    #    )
    #    rs_closed_f[f].append(f_values)
    #    rs_closed_pxx[f].append(pxx_values)

    # Convert lists to arrays for consistency
    #rs_closed_f[f] = np.array(rs_closed_f[f])
    #rs_closed_pxx[f] = np.array(rs_closed_pxx[f])
    rs_open_f[f] = np.array(rs_open_f[f])
    rs_open_pxx[f] = np.array(rs_open_pxx[f])

# Print the shape of the PSD results for each stimulus
for f, file in enumerate(files):
    print(f"File: {file}")
    print(f"Eyes open data PSD shape: {rs_open_pxx[f].shape}")

File: sub-P001_ses-S001_task-T1_run-001_eeg
Eyes open data PSD shape: (1, 1, 13, 1281)


# Visualize PSD

In [5]:
# Plot settings
plot_psd = True   # Enable to see plots
f_limits = [4, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted 
stimulus_index = 2  # Select index of stimulus to be plotted

if plot_psd:
    # Get the stimulus label by index
    stim_label = list(eeg_f[file_to_plot].keys())[stimulus_index]
    
    # Mask for frequency limits
    fmask = (eeg_f[file_to_plot][stim_label][0] >= f_limits[0]) & (eeg_f[file_to_plot][stim_label][0] <= f_limits[1])
    temp_freq = eeg_f[file_to_plot][stim_label][0][fmask]

    # Number of epochs
    num_epochs = len(eeg_pxx[file_to_plot][stim_label])

    # Create subplots dynamically based on the number of epochs plus one for the average
    fig, ax = plt.subplots(num_epochs + 1, 1, figsize=(10, 3 * (num_epochs + 1)))
    fig.suptitle(f'PSD for Stimulus: {stim_label} in File: {files[file_to_plot]}')

    # Plot PSD for each epoch
    for epoch_idx in range(num_epochs):
        temp_mean = np.mean(eeg_pxx[file_to_plot][stim_label][epoch_idx], axis=0)[fmask]
        temp_sd = np.std(eeg_pxx[file_to_plot][stim_label][epoch_idx], axis=0)[fmask]

        ax[epoch_idx].plot(temp_freq, temp_mean, '-')
        #ax[epoch_idx].fill_between(temp_freq, temp_mean - temp_sd, temp_mean + temp_sd, alpha=0.3)
        ax[epoch_idx].set_title(f'Epoch {epoch_idx + 1}')
        ax[epoch_idx].set_xlim(f_limits)
        ax[epoch_idx].set_ylabel("PXX [$\mu$V$^2$/Hz]")
        if epoch_idx == num_epochs - 1:
            ax[epoch_idx].set_xlabel("Frequency [Hz]")
        # Add vertical lines
        for x in [5, 10, 20, 30]:
            ax[epoch_idx].axvline(x=x, color='r', linestyle='--')
        ax[epoch_idx].axvline(x=12, color='g', linestyle='--')
        ax[epoch_idx].axvline(x=11, color='pink', linestyle='--')
        ax[epoch_idx].axvline(x=11.5, color='orange', linestyle='--')


    # Plot average PSD across all epochs
    avg_pxx = np.mean(eeg_pxx[file_to_plot][stim_label], axis=0)
    avg_mean = np.mean(avg_pxx, axis=0)[fmask]
    avg_sd = np.std(avg_pxx, axis=0)[fmask]

    ax[num_epochs].plot(temp_freq, avg_mean, '-')
    ax[num_epochs].set_title('Average PSD across all epochs')
    ax[num_epochs].set_xlim(f_limits)
    ax[num_epochs].set_ylabel("PXX [$\mu$V$^2$/Hz]")
    ax[num_epochs].set_xlabel("Frequency [Hz]")

    # Add vertical lines
    for x in [5, 10, 20, 30]:
        ax[num_epochs].axvline(x=x, color='r', linestyle='--')
    ax[num_epochs].axvline(x=12, color='g', linestyle='--')
    ax[num_epochs].axvline(x=11, color='pink', linestyle='--')
    ax[num_epochs].axvline(x=11.5, color='orange', linestyle='--')

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Visualize PSD for Resting

In [7]:
# Visualization settings
plot_psd = True   # Enable to see plots
f_limits = [4, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted 

if plot_psd:
    # Create subplots for resting state eyes open and eyes closed data
    fig, ax = plt.subplots(2, 1, figsize=(10, 6))
    fig.suptitle(f'PSD for Resting State Data in File: {files[file_to_plot]}')

    # Plot Resting State Eyes Open Data PSD
    fmask_open = (rs_open_f[file_to_plot][0] >= f_limits[0]) & (rs_open_f[file_to_plot][0] <= f_limits[1])
    temp_freq_open = rs_open_f[file_to_plot][0][fmask_open]

    temp_mean_open = np.mean(rs_open_pxx[file_to_plot][0][0], axis=0)[fmask_open]
    temp_sd_open = np.std(rs_open_pxx[file_to_plot][0][0], axis=0)[fmask_open]

    ax[0].plot(temp_freq_open, temp_mean_open, '-')
    ax[0].set_title('Resting State Eyes Open PSD')
    ax[0].set_xlim(f_limits)
    ax[0].set_ylabel("PXX [$\mu$V$^2$/Hz]")

    #fmask_closed = (rs_closed_f[file_to_plot][0] >= f_limits[0]) & (rs_closed_f[file_to_plot][0] <= f_limits[1])
    #temp_freq_closed = rs_closed_f[file_to_plot][0][fmask_closed]

    # Plot Resting State Eyes Closed Data PSD
    #temp_mean_closed = np.mean(rs_closed_pxx[file_to_plot][0][0], axis=0)[fmask_closed]
    #temp_sd_closed = np.std(rs_closed_pxx[file_to_plot][0][0], axis=0)[fmask_closed]

    #ax[1].plot(temp_freq_closed, temp_mean_closed, '-')
    #ax[1].set_title('Resting State Eyes Closed PSD')
    #ax[1].set_xlim(f_limits)
    #ax[1].set_ylabel("PXX [$\mu$V$^2$/Hz]")
    ax[0].set_xlabel("Frequency [Hz]")

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Compute SNR for all Epochs


In [7]:
# Settings
noise_band = 1    # Single-sided noise band [Hz]
nharms = 1        # Number of harmonics used
db_out = True     # Boolean to get output in dB
stim_freq = 10.0  # Example frequency, replace with your actual frequency

# Preallocate variables
snr = [None] * len(files)
epoch_count_snr = {}
epochs_snr = []

# HARDCODED FOR EXAMPLE
new_ch = ["Fp1", "AF3", "Fz", "C3", "CP1", "P5", "P1", "PO7", "POz", "PO8", "O1", "Oz", "O2"]

# Determine the number of epochs for each stimulus
for stim_idx, stim_label in settings[0]["stimuli"].items():  # Get labels from settings
    epoch_count_snr[stim_label] = eeg_pxx[0][stim_label].shape[0]

for f0, _ in enumerate(files):
    # Get the stimulus labels for this file
    stim_labels = list(settings[f0]["stimuli"].values())

    # Preallocate temp_snr as a NumPy array
    temp_snr = np.zeros([len(stim_labels), len(new_ch)])

    # Compute SNR per stimulus
    for stim_idx, stim_label in settings[f0]["stimuli"].items():
        s = stim_labels.index(stim_label)  # Get index based on order in settings[f0]["stimuli"]

        for epoch in range(epoch_count_snr[stim_label]):
            snr_value = processing.ssvep_snr(
                f=eeg_f[f0][stim_label][epoch],  
                pxx=eeg_pxx[f0][stim_label][epoch, :, :],  # Now correctly shaped
                stim_freq=stim_freq,
                noise_band=noise_band,
                nharms=nharms,
                db_out=db_out
            )

            epochs_snr.append(snr_value)

        temp_snr[s] = np.mean(np.array(epochs_snr), axis=0)

# Save temp SNR value
snr[f0] = temp_snr

# Export SNR

In [8]:
# Settings
save_snr = True     # Boolean to save SNRs to CSV

#HARDCODED FOR EXAMPLE
ch_subset =  ["Fp1", "AF3", "Fz", "C3", "CP1", "P5", "P1", "PO7", "POz", "PO8", "O1", "Oz", "O2"]

# Preallocate empty list to store all dataFrames
dfs = []

for f0, file in enumerate(files):
    # Preallocate variables
    col_names = []

    # Get the shape of the snr matrix for the current file
    snr_shape = snr[f0].shape
    temp_snr = np.zeros((len(ch_subset), len(settings[f0]["stimuli"])))

    col_idx = 0
    # Loop through stimuli to fill the SNR matrix
    for s, stimuli in settings[f0]["stimuli"].items():
        if int(s) < snr_shape[0]:
            temp_snr[:, col_idx] = snr[f0][int(s), :]
            col_names.append(f"{stimuli}")
            col_idx += 1
        else:
            print(f"Stimulus index {s} is out of bounds for snr[f0].shape[0]: {snr_shape[0]}")

    # Find indices of channel subset
    ch_subset_index = []
    row_names = []
    for channel in ch_subset:
        try:
            ch_subset_index.append(settings[f0]["ch_names"].index(channel))
            subject_id = file.split("_")[0]
            row_names.append(f"{subject_id} - {channel}")
        except ValueError:
            print(f"Trial {file} has no channel {channel} in dataset")

    # Create DataFrame for the file
    dfs.append(
        pd.DataFrame(
            data=temp_snr,  
            columns=col_names,
            index=row_names
        )
    )

# Concatenate all DataFrames
snr_df = pd.concat(dfs)

# Save SNRs to CSV
if save_snr:
    snr_df.to_csv("Data\\Masters_testing\\snr_results_test.csv")


# Compute Z-score for all Epochs

In [9]:
# Choose which EEG channels to include in the z-score calculation
selected_channels = [10, 11, 12]  # THis is using channel indices and is hardcoded for O1, Oz, O2

# Preallocate storage for z-score results
eeg_zscore_pxx = [None] * len(files)  

for f, file in enumerate(files):
    eeg_zscore_pxx[f] = {}  # Dictionary to store per-stimulus z-scores

    # Compute mean and std across resting-state epochs for selected channels
    # These are originally (stim, epoch, channel, freq) but there are only one stim and one epoch, so reduce those dimensions and average over channels
    rs_mean = np.mean(rs_open_pxx[f][:, :, selected_channels, :], axis=2).squeeze()  
    # output is the mean psd for each freq (1281), averaged across the selected channels for the whole RS open data
    rs_std = np.std(rs_open_pxx[f][:, :, selected_channels, :], axis=2).squeeze()    
    # output is the atd of the psd for each freq (1281), averaged across the selected channels for the whole RS open data

    # Compute Z-score for each stimulus
    for stim_label in eeg_pxx[f].keys():
        zscores_per_epoch = []  # Store z-score for each epoch

        for epoch in range(len(eeg_pxx[f][stim_label])):  
            # Compute z-score for selected channels
            zscore_epoch = (eeg_pxx[f][stim_label][epoch, selected_channels, :] - rs_mean) / rs_std  # Shape (3, 1281)

            # Average over selected channels to get one z-score vector per epoch
            avg_zscore_epoch = np.mean(zscore_epoch, axis=0)  # Shape (1281,)

            zscores_per_epoch.append(avg_zscore_epoch)

        # Convert to array for consistency (epochs, freqs)
        zscores_per_epoch = np.array(zscores_per_epoch)  # Shape (num_epochs, 1281)

        # Average across epochs to get final z-score per stimulus
        eeg_zscore_pxx[f][stim_label] = np.mean(zscores_per_epoch, axis=0)  # Shape (1281,)

# Print output shape to confirm
for f, file in enumerate(files):
    print(f"File: {file}")
    for stim_label in eeg_zscore_pxx[f].keys():
        print(f"  Stimulus: {stim_label}, Final Z-score shape: {eeg_zscore_pxx[f][stim_label].shape}")


File: sub-P001_ses-S001_task-T1_run-001_eeg
  Stimulus: Contrast1Size1, Final Z-score shape: (1281,)
  Stimulus: Contrast1Size2, Final Z-score shape: (1281,)
  Stimulus: Contrast1Size3, Final Z-score shape: (1281,)
  Stimulus: Contrast2Size1, Final Z-score shape: (1281,)
  Stimulus: Contrast2Size2, Final Z-score shape: (1281,)
  Stimulus: Contrast2Size3, Final Z-score shape: (1281,)
  Stimulus: Contrast3Size1, Final Z-score shape: (1281,)
  Stimulus: Contrast3Size2, Final Z-score shape: (1281,)
  Stimulus: Contrast3Size3, Final Z-score shape: (1281,)
  Stimulus: Contrast4Size1, Final Z-score shape: (1281,)
  Stimulus: Contrast4Size2, Final Z-score shape: (1281,)
  Stimulus: Contrast4Size3, Final Z-score shape: (1281,)


In [10]:
# Export to CSV
for f, file in enumerate(files):
    # Create a DataFrame to store all Z-scores for the file
    data = {'Frequency (Hz)': eeg_f[f][list(eeg_f[f].keys())[0]][0]}  # Assuming all epochs have the same frequency values
    for stim_label in eeg_zscore_pxx[f].keys():
        data[stim_label] = eeg_zscore_pxx[f][stim_label]

    df = pd.DataFrame(data)
    # Save to CSV
    csv_filename = "zscore.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Saved Z-scores to {csv_filename}")

Saved Z-scores to zscore.csv


# Save Data