# Compute PSD, SNR, and Z-Score of Pilot Data

## Goals:
1. **Data Import**
    - Import the preprocessed data from npz and json files

2. **Feature Extraction**
    - Calculate the Power Spectral Density (PSD) for each epoch.
    - Calculate the Signal-to-Noise Ratio (SNR) for each epoch (for use in stats)
    - Calculate the Z-score for each epoch (used to determine signal cutoff):
        * **Formula**:   PSD Z-score = (PSD(single trial) - Mean PSD of baseline trials) / Std PSD of baseline trials
        Add paper
        
3. **Data Formatting**
    - Save the data and export to an excel sheet
        * Get the average of all epochs for each unique stimulus

Eli fix- average over signals in time domain first and then end by averaging over freq (psd)
- don't average over z-score at the end because it is a normalization
- average over epochs (?)
end by average over freq (psd)

# Import Libraries

In [1]:
# Standard libraries
import json
import numpy as np
import pandas as pd
import scipy.signal as signal
import matplotlib.pyplot as plt

# Custom libraries
from Functions import processing

%matplotlib qt

# Import Epoched Data and Settings

In [2]:
# Load list of files to import
files = [  
    "sub-P001_ses-S001_task-T1_run-001_eeg"   
]

# Get unique subject IDs
subject_ids = [file.split('_')[0] for file in files]
unique_subject_ids = list(set(subject_ids))

# Preallocate variables to store EEG data and settings
eeg_epochs = [None] * len(files)
settings = [None] * len(files)
rs_open_data = [None] * len(files)

# Import data
for f, file in enumerate(files):
    # Import EEG data, since it is stored in a compressed numpy file (.npz) we need to use the np.load function 
    loaded_data = np.load(f"Data\\Masters_testing\\{file}.npz", allow_pickle=True)

    # Access the data for each stimulus
    eeg_epochs[f] = {stim_label: loaded_data[stim_label] for stim_label in loaded_data.files}

    # Import settings
    with open(f"Data\\Masters_testing\\{file}.json", "r") as file_object:
        settings[f] = json.load(file_object)

    # Import RS eyes open data
    rs_open_data[f] = np.load(f"Data\\Masters_testing\\{file}_open.npy", allow_pickle=True)

# Compute PSD for all "On" Epochs
** this is for code that is not averaged over epochs
Double check signal.welch 
- read this: https://docs.scipy.org/doc/scipy/tutorial/signal.html#tutorial-spectralanalysis

In [None]:
# PSD settings
window_size = 10  # 10 = 0.1 Hz resolution, 5 = 0.2 Hz resolution, 2 = 0.5 Hz resolution

# Preallocate variables
eeg_f = [None] * len(files)
eeg_pxx = [None] * len(files)  # Preallocate to list in case not all files have the same number of channels

# Compute PSD for each file
for f, file in enumerate(files):
    eeg_f[f] = {}
    eeg_pxx[f] = {}

    # Compute PSD for each stimulus
    for stim_label, epochs in eeg_epochs[f].items():
        eeg_f[f][stim_label] = []
        eeg_pxx[f][stim_label] = []

        # Compute PSD for each epoch
        for epoch in epochs:
            f_values, pxx_values = signal.welch(
                x=epoch,
                fs=settings[f]["eeg_srate"],
                nperseg=window_size * settings[f]["eeg_srate"],
                noverlap= (window_size * settings[f]["eeg_srate"]) * 0.5,  # 75% overlap between windows
            )
            eeg_f[f][stim_label].append(f_values)
            eeg_pxx[f][stim_label].append(pxx_values)

        # Convert lists to arrays for consistency
        eeg_f[f][stim_label] = np.array(eeg_f[f][stim_label])
        eeg_pxx[f][stim_label] = np.array(eeg_pxx[f][stim_label])

# Print the shape of the PSD results for each stimulus
#for f, file in enumerate(files):
#    print(f"File: {file}")
#    for stim_label in eeg_f[f].keys():
#        print(f"  Stimulus: {stim_label}, PSD shape: {eeg_pxx[f][stim_label].shape}")

plot_psd = True   # Enable to see plots
f_limits = [4, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted 
num_stimuli = 12    # Number of stimuli

if plot_psd:
    fig, axes = plt.subplots(3, 4, figsize=(15, 10))
    fig.suptitle(f'PSD for All Stimuli in File: {files[file_to_plot]}')
    axes = axes.flatten()

    for stim_idx in range(num_stimuli):
        stim_label = list(eeg_f[file_to_plot].keys())[stim_idx]
        fmask = (eeg_f[file_to_plot][stim_label][0] >= f_limits[0]) & (eeg_f[file_to_plot][stim_label][0] <= f_limits[1])
        temp_freq = eeg_f[file_to_plot][stim_label][0][fmask]
        num_epochs = len(eeg_pxx[file_to_plot][stim_label])
        
        # Compute average across epochs
        avg_pxx = np.mean(eeg_pxx[file_to_plot][stim_label], axis=0)
        avg_mean = np.mean(avg_pxx, axis=0)[fmask]
        avg_sd = np.std(avg_pxx, axis=0)[fmask]
        
        # Plot average PSD across all epochs for this stimulus
        axes[stim_idx].plot(temp_freq, avg_mean, '-')
        axes[stim_idx].set_title(f'Stimulus: {stim_label}, 25% overlap')
        axes[stim_idx].set_xlim(f_limits)
        axes[stim_idx].set_ylabel("PXX [$\mu$V$^2$/Hz]")
        if stim_idx >= 8:
            axes[stim_idx].set_xlabel("Frequency [Hz]")
        
        # Add vertical lines
        for x in [5, 10, 20, 30]:
            axes[stim_idx].axvline(x=x, color='r', linestyle='--')
        #axes[stim_idx].axvline(x=12, color='g', linestyle='--')
        #axes[stim_idx].axvline(x=11, color='pink', linestyle='--')
        #axes[stim_idx].axvline(x=11.5, color='orange', linestyle='--')

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Compute PSD for Resting State Epochs

Get PSD for RS eyes open and closed data with the same frequency resolution (window size) that was used for "on" epochs

After the PSD is calculated, get the mean and std of the RS data for use in the Z-score calculation

In [None]:
# PSD settings
window_size = 10

# Preallocate variables
rs_open_f = [None] * len(files)
rs_open_pxx = [None] * len(files)

for f, file in enumerate(files):
    rs_open_f[f] = []
    rs_open_pxx[f] = []

    for epoch in rs_open_data[f]:
        f_values, pxx_values = signal.welch(
            x=epoch,
            fs=settings[f]["eeg_srate"],
            nperseg=window_size * settings[f]["eeg_srate"]
        )
        rs_open_f[f].append(f_values)
        rs_open_pxx[f].append(pxx_values)

    rs_open_f[f] = np.array(rs_open_f[f])
    rs_open_pxx[f] = np.array(rs_open_pxx[f])

# Print the shape of the PSD results for each stimulus
#for f, file in enumerate(files):
#    print(f"File: {file}")
#    print(f"Eyes open data PSD shape: {rs_open_pxx[f].shape}")


# Get mean and std of PSD for each frequency for the whole RS open data
selected_channels = [10, 11, 12] 

# original shape = (1, 1, 6, 13, 1281), squeeze collapses the dimensions of size 1 -> (6, 13, 1281), then select channels -> (6, 3, 1281), then average across epochs -> (3, 1281)
rs_mean = np.mean(rs_open_pxx[f][:, :, :, selected_channels, :], axis=2).squeeze()  #shape = 3, 1281
rs_std = np.std(rs_open_pxx[f][:, :, :, selected_channels, :], axis=2).squeeze()   #shape = 3, 1281

File: sub-P001_ses-S001_task-T1_run-001_eeg
Eyes open data PSD shape: (1, 1, 6, 13, 1281)


# Compute SNR for all Epochs


In [6]:
# Settings
noise_band = 1    # Single-sided noise band [Hz]
nharms = 1        # Number of harmonics used
db_out = True     # Boolean to get output in dB
stim_freq = 10.0  # Example frequency, replace with your actual frequency

# Preallocate variables
snr = [None] * len(files)
epoch_count_snr = {}
epochs_snr = []

# HARDCODED FOR EXAMPLE
new_ch = ["Fp1", "AF3", "Fz", "C3", "CP1", "P5", "P1", "PO7", "POz", "PO8", "O1", "Oz", "O2"]

# Determine the number of epochs for each stimulus
for stim_idx, stim_label in settings[0]["stimuli"].items():  # Get labels from settings
    epoch_count_snr[stim_label] = eeg_pxx[0][stim_label].shape[0]

for f0, _ in enumerate(files):
    stim_labels = list(settings[f0]["stimuli"].values())
    temp_snr = np.zeros([len(stim_labels), len(new_ch)])

    # Compute SNR per stimulus
    for stim_idx, stim_label in settings[f0]["stimuli"].items():
        s = stim_labels.index(stim_label)  # Get index based on order in settings[f0]["stimuli"]

        for epoch in range(epoch_count_snr[stim_label]):
            snr_value = processing.ssvep_snr(
                f=eeg_f[f0][stim_label][epoch],  
                pxx=eeg_pxx[f0][stim_label][epoch, :, :], 
                stim_freq=stim_freq,
                noise_band=noise_band,
                nharms=nharms,
                db_out=db_out
            )

            epochs_snr.append(snr_value)

        temp_snr[s,:] = np.mean(np.array(epochs_snr), axis=0)

# Save temp SNR value
snr[f0] = temp_snr

# Compute Z-score for all Epochs (Averaging)
- averages over channels and epochs for each stimulus

In [None]:
# Choose which EEG channels to include in the z-score calculation
selected_channels = [10, 11, 12]  # Using channel indices for O1, Oz, O2

# Preallocate storage for z-score results
eeg_zscore_pxx = [None] * len(files)  

for f, file in enumerate(files):
    eeg_zscore_pxx[f] = {}  # Dictionary to store per-stimulus z-scores

    # Compute Z-score for each stimulus
    for stim_label in eeg_pxx[f].keys():
        eeg_pxx_channels = eeg_pxx[f][stim_label][:, selected_channels, :]  # Select only the channels of interest

        zscores_per_epoch = []  # Store z-score for each epoch

        for epoch in range(len(eeg_pxx[f][stim_label])):  
            # For each selected channel, compute z-score
            zscore_epoch = []

            for ch in range(len(selected_channels)):
                # Compute z-score for the current channel at the current epoch
                zscore_channel = (eeg_pxx_channels[epoch, ch, :] - rs_mean[ch, :]) / rs_std[ch, :]
                zscore_epoch.append(zscore_channel)

            # Average over selected channels to get one z-score vector per epoch
            avg_zscore_epoch = np.mean(zscore_epoch, axis=0)  # Average over channels for each frequency

            zscores_per_epoch.append(avg_zscore_epoch)
            #print(f"Epoch {epoch} for stimulus {stim_label} has shape {avg_zscore_epoch.shape}")

        # Convert to array for consistency (epochs, freqs)
        zscores_per_epoch = np.array(zscores_per_epoch)  # Shape (num_epochs, 1281)
        #print(f"Stimulus {stim_label} has shape {zscores_per_epoch.shape}")

        # Average across epochs to get final z-score per stimulus
        eeg_zscore_pxx[f][stim_label] = np.mean(zscores_per_epoch, axis=0)  # Shape (1281,)
        #print(f"Final Z-score shape: {eeg_zscore_pxx[f][stim_label].shape}")
        # Could get the std of the z-scores here if desired
        # std_zscore_epoch = np.std(zscore_epoch, axis=0)
        # Add this to make sure there aren't crazy outliers

# Print output shape to confirm
#for f, file in enumerate(files):
#    print(f"File: {file}")
#    for stim_label in eeg_zscore_pxx[f].keys():
#        print(f"  Stimulus: {stim_label}, Final Z-score shape: {eeg_zscore_pxx[f][stim_label].shape}")

# Plot settings
plot_zscore = True  # Enable to see plots
f_limits = [4, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted
stimulus_index = 0  # Select index of stimulus to be plotted

if plot_zscore:
    # Get the stimulus label to plot
    stim_labels = list(eeg_zscore_pxx[file_to_plot].keys())
    stim_label_to_plot = stim_labels[stimulus_index]

    # Get the frequency values and Z-score values
    freqs = eeg_f[file_to_plot][stim_label_to_plot][0]  # Assuming all epochs have the same frequency values
    zscore_values = eeg_zscore_pxx[file_to_plot][stim_label_to_plot]

    # Plot the Z-score as a line graph
    plt.figure(figsize=(10, 6))
    plt.plot(freqs, zscore_values, label=f'Stimulus: {stim_label_to_plot}')
    plt.xlim(f_limits)
    plt.ylim(-5, 10)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Z-score')
    plt.title(f'Z-score of PSD for {stim_label_to_plot}')
    plt.legend()
    plt.grid(True)
    plt.show()

Epoch 0 for stimulus Contrast1Size1 has shape (1281,)
Epoch 1 for stimulus Contrast1Size1 has shape (1281,)
Stimulus Contrast1Size1 has shape (2, 1281)
Final Z-score shape: (1281,)
Epoch 0 for stimulus Contrast1Size2 has shape (1281,)
Epoch 1 for stimulus Contrast1Size2 has shape (1281,)
Stimulus Contrast1Size2 has shape (2, 1281)
Final Z-score shape: (1281,)
Epoch 0 for stimulus Contrast1Size3 has shape (1281,)
Epoch 1 for stimulus Contrast1Size3 has shape (1281,)
Stimulus Contrast1Size3 has shape (2, 1281)
Final Z-score shape: (1281,)
Epoch 0 for stimulus Contrast2Size1 has shape (1281,)
Epoch 1 for stimulus Contrast2Size1 has shape (1281,)
Epoch 2 for stimulus Contrast2Size1 has shape (1281,)
Stimulus Contrast2Size1 has shape (3, 1281)
Final Z-score shape: (1281,)
Epoch 0 for stimulus Contrast2Size2 has shape (1281,)
Epoch 1 for stimulus Contrast2Size2 has shape (1281,)
Epoch 2 for stimulus Contrast2Size2 has shape (1281,)
Epoch 3 for stimulus Contrast2Size2 has shape (1281,)
Stimul

# Export Z-Score

In [8]:
save_zscore = False

if save_zscore:
    # Export to CSV
    for f, file in enumerate(files):
        # Create a DataFrame to store all Z-scores for the file
        data = {'Frequency (Hz)': eeg_f[f][list(eeg_f[f].keys())[0]][0]}
        for stim_label in eeg_zscore_pxx[f].keys():
            data[stim_label] = eeg_zscore_pxx[f][stim_label]

        df = pd.DataFrame(data)
        # Save to CSV
        csv_filename = "zscore.csv"
        df.to_csv(csv_filename, index=False)

# Export SNR

In [10]:
# Settings
save_snr = False    # Boolean to save SNRs to CSV

#HARDCODED FOR EXAMPLE
ch_subset =  ["Fp1", "AF3", "Fz", "C3", "CP1", "P5", "P1", "PO7", "POz", "PO8", "O1", "Oz", "O2"]

# Preallocate empty list to store all dataFrames
dfs = []

for f0, file in enumerate(files):
    # Preallocate variables
    col_names = []

    # Get the shape of the snr matrix for the current file
    snr_shape = snr[f0].shape
    temp_snr = np.zeros((len(ch_subset), len(settings[f0]["stimuli"])))

    col_idx = 0
    # Loop through stimuli to fill the SNR matrix
    for s, stimuli in settings[f0]["stimuli"].items():
        if int(s) < snr_shape[0]:
            temp_snr[:, col_idx] = snr[f0][int(s), :]
            col_names.append(f"{stimuli}")
            col_idx += 1
        else:
            print(f"Stimulus index {s} is out of bounds for snr[f0].shape[0]: {snr_shape[0]}")

    # Find indices of channel subset
    ch_subset_index = []
    row_names = []
    for channel in ch_subset:
        try:
            ch_subset_index.append(settings[f0]["ch_names"].index(channel))
            subject_id = file.split("_")[0]
            row_names.append(f"{subject_id} - {channel}")
        except ValueError:
            print(f"Trial {file} has no channel {channel} in dataset")

    # Create DataFrame for the file
    dfs.append(
        pd.DataFrame(
            data=temp_snr,  
            columns=col_names,
            index=row_names
        )
    )

# Concatenate all DataFrames
snr_df = pd.concat(dfs)

# Save SNRs to CSV
if save_snr:
    snr_df.to_csv("Data\\Masters_testing\\snr_results_test2.csv")
