# Compute PSD and Z-Score of Pilot Data

## Goals:
1. **Data Import**
    - Import the preprocessed data from npz and json files

2. **Feature Extraction**
    - Average the time series data for each epoch over the occipital channels
    - Calculate the Power Spectral Density (PSD) for each epoch.
    - Calculate the Z-score for each epoch (used to determine signal cutoff):
        * **Formula**:   PSD Z-score = (PSD(single trial) - Mean PSD of baseline trials) / Std PSD of baseline trials
    - Find a way to combine the Z-scores for each stimulus (will end up with n_zscores = n_epochs per stimulus)
        
3. **Data Formatting**
    - Save the Z score data and export to an excel sheet

# Import Libraries

In [1]:
# Standard libraries
import json
import numpy as np
import pandas as pd
import scipy.signal as signal
import matplotlib.pyplot as plt

# Custom libraries
from Functions import processing

%matplotlib qt

# Import Epoched Data and Settings

In [2]:
# Load list of files to import
files = [  
    "sub-P010_ses-S001_task-T1_run-001_eeg",
    #"sub-P002_ses-S001_task-T1_run-001_eeg",
    #"sub-P003_ses-S001_task-T1_run-001_eeg",
    #"sub-P004_ses-S001_task-T1_run-001_eeg",
    #"sub-P005_ses-S001_task-T1_run-001_eeg",
    #"sub-P006_ses-S001_task-T1_run-001_eeg",
    #"sub-P008-Redo_ses-S001_task-T2_run-001_eeg", 
    #"sub-P009_ses-S001_task-T1_run-001_eeg",
    #"sub-P010_ses-S001_task-T1_run-001_eeg", 
]

# Get unique subject IDs- Currently not used
subject_ids = [file.split('_')[0] for file in files]
unique_subject_ids = list(set(subject_ids))

# Preallocate variables to store EEG data and settings
eeg_epochs = [None] * len(files)
settings = [None] * len(files)
baseline = [None] * len(files)

# Import data
for f, file in enumerate(files):
    # Import EEG data, since it is stored in a compressed numpy file (.npz) we need to use the np.load function 
    loaded_data = np.load(f"Data\\Pilot-Data\\EEG\\All\\{file}.npz", allow_pickle=True)

    # Access the data for each stimulus
    eeg_epochs[f] = {stim_label: loaded_data[stim_label] for stim_label in loaded_data.files}

    # Import settings
    with open(f"Data\\Pilot-Data\\EEG\\All\\{file}.json", "r") as file_object:
        settings[f] = json.load(file_object)

    # Import baseline data
    loaded_baseline = np.load(f"Data\\Pilot-Data\\EEG\\All\\{file}_baseline.npz", allow_pickle=True)
    baseline[f] = {stim_label: loaded_baseline[stim_label] for stim_label in loaded_baseline.files}

# Average time series across occipital channels per epoch for "on" data and baseline data

In [3]:
selected_channels = ['O1', 'Oz', 'O2']
occipital_epochs = [{} for _ in range(len(files))]  # List of dictionaries for each file
occipital_baseline = [{} for _ in range(len(files))]  # List of dictionaries for each file

for f in range(len(files)):
    # Get channel names for this file
    available_channels = settings[f]['new_ch_names']
    
    # Get indices of channels that are actually available
    channel_indices = []
    for ch in selected_channels:
        if ch in available_channels:
            channel_indices.append(available_channels.index(ch))
        else:
            print(f"Channel {ch} is not available in file {files[f]}")

    if not channel_indices:
        print(f"No selected occipital channels found for file {files[f]}. Skipping averaging.")
        continue  # Skip this file if no channels are available

    # Process epochs
    for stim_label in eeg_epochs[f]:
        # Extract available occipital channels
        occipital_epochs[f][stim_label] = eeg_epochs[f][stim_label][:, channel_indices, :]
        # Average over those channels
        occipital_epochs[f][stim_label] = np.mean(occipital_epochs[f][stim_label], axis=1)

    # Process baseline
    for stim_label in baseline[f]:
        stim_epochs = baseline[f][stim_label]

        occipital_baseline[f][stim_label] = baseline[f][stim_label][:, channel_indices, :]
        occipital_baseline[f][stim_label] = np.mean(occipital_baseline[f][stim_label], axis=1)

# Compute PSD for each "On" Epoch

In [4]:
# PSD settings
window_size = 10  # 10 = 0.1 Hz resolution, 5 = 0.2 Hz resolution, 2 = 0.5 Hz resolution

# Preallocate variables
eeg_f = [None] * len(files)
eeg_pxx = [None] * len(files)  # Preallocate to list in case not all files have the same number of epochs

# Compute PSD for each file
for f in range(len(files)):
    eeg_f[f] = {}
    eeg_pxx[f] = {}

    # Compute PSD for each stimulus
    for stim_label, epochs in occipital_epochs[f].items(): 
        eeg_f[f][stim_label] = []
        eeg_pxx[f][stim_label] = []

        # Compute PSD for each epoch
        for epoch in epochs:  # Each epoch is now a 1D array (num_samples,)
            f_values, pxx_values = signal.welch(
                x=epoch,  # 1D array (samples,)
                fs=settings[f]["eeg_srate"],
                nperseg=window_size * settings[f]["eeg_srate"],
                noverlap=(window_size * settings[f]["eeg_srate"]) * 0.5,  # 50% overlap
            )
            eeg_f[f][stim_label].append(f_values)
            eeg_pxx[f][stim_label].append(pxx_values)

        # Convert lists to arrays for consistency
        eeg_f[f][stim_label] = np.array(eeg_f[f][stim_label])  # Shape: (num_epochs, num_frequencies)
        eeg_pxx[f][stim_label] = np.array(eeg_pxx[f][stim_label])  # Shape: (num_epochs, num_frequencies)

#Print the shape of the PSD results for each stimulus
for f, file in enumerate(files):
     print(f"File: {file}")
     for stim_label in eeg_f[f].keys():
         print(f"  Stimulus: {stim_label}, PSD shape: {eeg_pxx[f][stim_label].shape}")

File: sub-P010_ses-S001_task-T1_run-001_eeg
  Stimulus: Contrast1Size1, PSD shape: (3, 1281)
  Stimulus: Contrast1Size2, PSD shape: (3, 1281)
  Stimulus: Contrast1Size3, PSD shape: (2, 1281)
  Stimulus: Contrast2Size1, PSD shape: (2, 1281)
  Stimulus: Contrast2Size2, PSD shape: (4, 1281)
  Stimulus: Contrast2Size3, PSD shape: (2, 1281)
  Stimulus: Contrast3Size1, PSD shape: (3, 1281)
  Stimulus: Contrast3Size2, PSD shape: (3, 1281)
  Stimulus: Contrast3Size3, PSD shape: (5, 1281)
  Stimulus: Contrast4Size1, PSD shape: (5, 1281)
  Stimulus: Contrast4Size2, PSD shape: (3, 1281)
  Stimulus: Contrast4Size3, PSD shape: (5, 1281)


# Plot PSD of "On" Epochs

In [5]:
plot_psd = True  # Enable to see plots
f_limits = [5, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted
num_stimuli = 12    # Number of stimuli

if plot_psd:
    fig, axes = plt.subplots(3, 4, figsize=(15, 10))
    fig.suptitle(f'PSD for All Stimuli in File: {files[file_to_plot]}')
    axes = axes.flatten()

    for stim_idx in range(num_stimuli):
        stim_label = list(eeg_f[file_to_plot].keys())[stim_idx]
        
        # Extract frequency values
        fmask = (eeg_f[file_to_plot][stim_label][0] >= f_limits[0]) & (eeg_f[file_to_plot][stim_label][0] <= f_limits[1])
        temp_freq = eeg_f[file_to_plot][stim_label][0][fmask]
        
        # Plot each epoch separately for this stimulus
        for epoch_idx, epoch in enumerate(eeg_pxx[file_to_plot][stim_label]):
            avg_pxx = epoch[fmask]  # Get values within frequency limits
            axes[stim_idx].plot(temp_freq, avg_pxx, label=f'Epoch {epoch_idx+1}')  # Label each epoch
        
        # Set plot details
        axes[stim_idx].set_title(f'Stimulus: {stim_label}')
        axes[stim_idx].set_xlim(f_limits)
        axes[stim_idx].set_ylabel("PXX [$\mu$V$^2$/Hz]")

        if stim_idx >= 8:
            axes[stim_idx].set_xlabel("Frequency [Hz]")

        # Add legend to identify epochs
        axes[stim_idx].legend()

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

  axes[stim_idx].set_ylabel("PXX [$\mu$V$^2$/Hz]")


# Compute Mean and STD of the PSD for RS Eyes Open Epochs

In [6]:
# PSD settings
window_size = 10  # 10 = 0.1 Hz resolution, 5 = 0.2 Hz resolution, 2 = 0.5 Hz resolution

# Preallocate variables
baseline_f = [None] * len(files)
baseline_pxx = [None] * len(files)  # Preallocate to list in case not all files have the same number of epochs

# Compute PSD for each file
for f in range(len(files)):
    baseline_f[f] = {}
    baseline_pxx[f] = {}

    # Compute PSD for each stimulus
    for stim_label, epochs in occipital_baseline[f].items(): 
        baseline_f[f][stim_label] = []
        baseline_pxx[f][stim_label] = []

        # Compute PSD for each epoch
        for epoch in epochs:  # Each epoch is now a 1D array (num_samples,)
            f_values, pxx_values = signal.welch(
                x=epoch,  # 1D array (samples,)
                fs=settings[f]["eeg_srate"],
                nperseg=window_size * settings[f]["eeg_srate"],
                noverlap=(window_size * settings[f]["eeg_srate"]) * 0.5,  # 50% overlap
            )
            baseline_f[f][stim_label].append(f_values)
            baseline_pxx[f][stim_label].append(pxx_values)

        # Convert lists to arrays for consistency
        baseline_f[f][stim_label] = np.array(baseline_f[f][stim_label])  # Shape: (num_epochs, num_frequencies)
        baseline_pxx[f][stim_label] = np.array(baseline_pxx[f][stim_label])  # Shape: (num_epochs, num_frequencies)

        # get the mean and std of the baseline data
        baseline_mean = np.mean(baseline_pxx[f][stim_label], axis=0)
        baseline_std = np.std(baseline_pxx[f][stim_label], axis=0)

# Calculate Z Score for all epochs
- Calculate per stimulus (averaged across epochs) and save into eeg_zscore_mean_pxx

In [7]:
# Preallocate storage for z-score results
eeg_zscore_mean_pxx = [None] * len(files)

for f, file in enumerate(files):
    eeg_zscore_mean_pxx[f] = {}  # Dictionary to store per-stimulus z-scores averaged across epochs

    # Compute Z-score for each stimulus
    for stim_label in eeg_pxx[f].keys():
        eeg_pxx_values = eeg_pxx[f][stim_label]  # Already averaged over channels, shape (num_epochs, freqs)

        zscores_per_epoch = []  # Store z-scores for each epoch

        for epoch in range(len(eeg_pxx_values)):  
            # Compute z-score for the current epoch
            zscore_epoch = (eeg_pxx_values[epoch, :] - baseline_mean) / baseline_std  # Shape: (freqs,)
            zscores_per_epoch.append(zscore_epoch)  # Store the z-scores for this epoch

        # Convert to array for consistency (epochs, freqs)
        zscores_per_epoch = np.array(zscores_per_epoch)  

        # Calculate the mean z-score for each stimulus
        mean_zscore = np.mean(zscores_per_epoch, axis = 0)

        # Store z-scores per stimulus (averaged across epochs)
        eeg_zscore_mean_pxx[f][stim_label] = mean_zscore

# Plot Mean Z-Score

In [8]:
# Plot all 12 stimuli as a panel plot
plot_zscore_mean = True  # Enable to see plots
f_limits = [8, 24]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted

if plot_zscore_mean:
    # Get the stimulus labels
    stim_labels = list(eeg_zscore_mean_pxx[file_to_plot].keys())
    num_stimuli = len(stim_labels)  # Number of stimuli

    # Create subplots
    fig, axes = plt.subplots(3, 4, figsize=(15, 10))  # 3 rows, 4 columns
    fig.suptitle(f'Mean Z-scores for All Stimuli in File: {files[file_to_plot]}')
    axes = axes.flatten()

    for stim_idx, stim_label in enumerate(stim_labels):
        # Get the frequency values for the current stimulus
        freqs = eeg_f[file_to_plot][stim_label][0]  # Assuming all epochs have the same frequency values

        # Get the mean z-scores for the current stimulus
        zscore_mean_values = eeg_zscore_mean_pxx[file_to_plot][stim_label]  # Shape: (freqs,)

        # Plot the mean Z-score for the current stimulus
        axes[stim_idx].plot(freqs, zscore_mean_values, label=f'{stim_label}')
        axes[stim_idx].set_xlim(f_limits)
        axes[stim_idx].set_ylim(-5, 10)
        axes[stim_idx].set_title(f'Stimulus: {stim_label}')
        axes[stim_idx].set_xlabel('Frequency (Hz)')
        axes[stim_idx].set_ylabel('Z-score')
        axes[stim_idx].grid(True)
        axes[stim_idx].legend()

    # Adjust layout
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Get Averaged Z Score at 10 and 20 Hz

In [9]:
# Create a mask for both 10 Hz and 20 Hz
fmask = (freqs == 10.0) | (freqs == 20.0)

ten_twenty_mean_zscores = [None] * len(files)

for f, file in enumerate(files):
    ten_twenty_mean_zscores[f] = {}

    # Compute filtered mean Z scores for each stimulus
    for stim_label in eeg_zscore_mean_pxx[f].keys():
        zscores = eeg_zscore_mean_pxx[f][stim_label]

        # Use the mask to get values at 10 Hz and 20 Hz
        selected = zscores[fmask]

        # Store as a list or tuple
        ten_twenty_mean_zscores[f][stim_label] = selected.tolist()


# Export Z scores
Does not export for all participants

In [10]:
save_zscore = True  # Set to True to save the z-scores to a CSV file

if save_zscore:
    stimuli = ten_twenty_mean_zscores[0].keys()

    mean_zscores = {stim: [[], []] for stim in stimuli}  # index 0 = 10Hz, 1 = 20Hz

    for f in range(len(ten_twenty_mean_zscores)):
        for stim in stimuli:
            mean_zscores[stim][0].append(ten_twenty_mean_zscores[f][stim][0])  # 10 Hz
            mean_zscores[stim][1].append(ten_twenty_mean_zscores[f][stim][1])  # 20 Hz

    df = pd.DataFrame([
        [stim for stim in mean_zscores.keys()],
        [mean_zscores[stim][0][0] for stim in mean_zscores.keys()],  # 10 Hz
        [mean_zscores[stim][1][0] for stim in mean_zscores.keys()]   # 20 Hz
    ])

    df.index = ['Stimulus', '10Hz', '20Hz']

    # Save to CSV
    df.to_csv('Data\\Pilot-Data\\EEG\\sub-P010\\zscores_P010.csv', index=False, header=False)