# Compute PSD and Z-Score of Pilot Data

## Goals:
1. **Data Import**
    - Import the preprocessed data from npz and json files

2. **Feature Extraction**
    - Average the time series data for each epoch over the occipital channels
    - Calculate the Power Spectral Density (PSD) for each epoch.
    - Calculate the Z-score for each epoch (used to determine signal cutoff):
        * **Formula**:   PSD Z-score = (PSD(single trial) - Mean PSD of baseline trials) / Std PSD of baseline trials
    - Find a way to combine the Z-scores for each stimulus (will end up with n_zscores = n_epochs per stimulus)
        
3. **Data Formatting**
    - Save the Z score data and export to an excel sheet

# Import Libraries

In [1]:
# Standard libraries
import json
import numpy as np
import pandas as pd
import scipy.signal as signal
import matplotlib.pyplot as plt

# Custom libraries
from Functions import processing

%matplotlib qt

# Import Epoched Data and Settings

In [2]:
# Load list of files to import
files = [  
    "sub-P001_ses-S001_task-T1_run-001_eeg"   
]

# Get unique subject IDs
subject_ids = [file.split('_')[0] for file in files]
unique_subject_ids = list(set(subject_ids))

# Preallocate variables to store EEG data and settings
eeg_epochs = [None] * len(files)
settings = [None] * len(files)
rs_open_data = [None] * len(files)

# Import data
for f, file in enumerate(files):
    # Import EEG data, since it is stored in a compressed numpy file (.npz) we need to use the np.load function 
    loaded_data = np.load(f"Data\\Masters_testing\\{file}.npz", allow_pickle=True)

    # Access the data for each stimulus
    eeg_epochs[f] = {stim_label: loaded_data[stim_label] for stim_label in loaded_data.files}

    # Import settings
    with open(f"Data\\Masters_testing\\{file}.json", "r") as file_object:
        settings[f] = json.load(file_object)

    # Import RS eyes open data
    rs_open_data[f] = np.load(f"Data\\Masters_testing\\{file}_open.npy", allow_pickle=True)

# Average time series across occipital channels per epoch

In [3]:
# Set selected channels and extract their indices from settings
selected_channels = ['O1', 'Oz', 'O2']
channel_indices = [settings[0]['new_ch_names'].index(channel) for channel in selected_channels]

# For each subject
occipital_epochs = [{} for _ in range(len(files))] # List of dictionaries for each file
for f in range(len(files)):
    # For each stimulus
    for stim_label in eeg_epochs[f]:
        # Extract occipital channels
        occipital_epochs[f][stim_label] = eeg_epochs[f][stim_label][:, channel_indices, :]
        #print(f"File {f}, Stimulus {stim_label}: {occipital_epochs[f][stim_label].shape}")

        # Average over occiopital channels for each epoch for each stimulus
        occipital_epochs[f][stim_label] = np.mean(occipital_epochs[f][stim_label], axis=1)
        #print(f"File {f}, Stimulus {stim_label}: {occipital_epochs[f][stim_label].shape}")

# Compute PSD for each "On" Epoch

In [4]:
# PSD settings
window_size = 10  # 10 = 0.1 Hz resolution, 5 = 0.2 Hz resolution, 2 = 0.5 Hz resolution

# Preallocate variables
eeg_f = [None] * len(files)
eeg_pxx = [None] * len(files)  # Preallocate to list in case not all files have the same number of epochs

# Compute PSD for each file
for f in range(len(files)):
    eeg_f[f] = {}
    eeg_pxx[f] = {}

    # Compute PSD for each stimulus
    for stim_label, epochs in occipital_epochs[f].items():  # Use occipital_epochs instead of eeg_epochs
        eeg_f[f][stim_label] = []
        eeg_pxx[f][stim_label] = []

        # Compute PSD for each epoch
        for epoch in epochs:  # Each epoch is now a 1D array (num_samples,)
            f_values, pxx_values = signal.welch(
                x=epoch,  # 1D array (samples,)
                fs=settings[f]["eeg_srate"],
                nperseg=window_size * settings[f]["eeg_srate"],
                noverlap=(window_size * settings[f]["eeg_srate"]) * 0.5,  # 50% overlap
            )
            eeg_f[f][stim_label].append(f_values)
            eeg_pxx[f][stim_label].append(pxx_values)

        # Convert lists to arrays for consistency
        eeg_f[f][stim_label] = np.array(eeg_f[f][stim_label])  # Shape: (num_epochs, num_frequencies)
        eeg_pxx[f][stim_label] = np.array(eeg_pxx[f][stim_label])  # Shape: (num_epochs, num_frequencies)

#Print the shape of the PSD results for each stimulus
#for f, file in enumerate(files):
#     print(f"File: {file}")
#     for stim_label in eeg_f[f].keys():
#         print(f"  Stimulus: {stim_label}, PSD shape: {eeg_pxx[f][stim_label].shape}")

plot_psd = True  # Enable to see plots
f_limits = [5, 35]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted
num_stimuli = 12    # Number of stimuli

if plot_psd:
    fig, axes = plt.subplots(3, 4, figsize=(15, 10))
    fig.suptitle(f'PSD for All Stimuli in File: {files[file_to_plot]}')
    axes = axes.flatten()

    for stim_idx in range(num_stimuli):
        stim_label = list(eeg_f[file_to_plot].keys())[stim_idx]
        
        # Extract frequency values
        fmask = (eeg_f[file_to_plot][stim_label][0] >= f_limits[0]) & (eeg_f[file_to_plot][stim_label][0] <= f_limits[1])
        temp_freq = eeg_f[file_to_plot][stim_label][0][fmask]
        
        # Plot each epoch separately for this stimulus
        for epoch_idx, epoch in enumerate(eeg_pxx[file_to_plot][stim_label]):
            avg_pxx = epoch[fmask]  # Get values within frequency limits
            axes[stim_idx].plot(temp_freq, avg_pxx, label=f'Epoch {epoch_idx+1}')  # Label each epoch
        
        # Set plot details
        axes[stim_idx].set_title(f'Stimulus: {stim_label}')
        axes[stim_idx].set_xlim(f_limits)
        axes[stim_idx].set_ylabel("PXX [$\mu$V$^2$/Hz]")

        if stim_idx >= 8:
            axes[stim_idx].set_xlabel("Frequency [Hz]")

        # Add vertical lines for specific frequencies
        #for x in [5, 10, 20, 30]:
        #    axes[stim_idx].axvline(x=x, color='r', linestyle='--')

        # Add legend to identify epochs
        axes[stim_idx].legend()

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Compute Mean and STD of the PSD for RS Eyes Open Epochs

In [5]:
# PSD settings
window_size = 10

# Preallocate variables
rs_open_f = [None] * len(files)
rs_open_pxx = [None] * len(files)

for f, file in enumerate(files):
    rs_open_f[f] = []
    rs_open_pxx[f] = []

    for epoch in rs_open_data[f]:
        f_values, pxx_values = signal.welch(
            x=epoch,
            fs=settings[f]["eeg_srate"],
            nperseg=window_size * settings[f]["eeg_srate"]
        )
        rs_open_f[f].append(f_values)
        rs_open_pxx[f].append(pxx_values)

    rs_open_f[f] = np.array(rs_open_f[f])
    rs_open_pxx[f] = np.array(rs_open_pxx[f])

# Print the shape of the PSD results for each stimulus
#for f, file in enumerate(files):
#    print(f"File: {file}")
#    print(f"Eyes open data PSD shape: {rs_open_pxx[f].shape}")

# Get mean and std of PSD for each frequency for the whole RS open data 

# original shape = (1, 1, 6, 13, 1281), squeeze collapses the dimensions of size 1 -> (6, 13, 1281), then select channels -> (6, 3, 1281), then average across channels -> (3, 1281)
# average over channels
rs_mean = np.mean(rs_open_pxx[f][:, :, :, channel_indices, :], axis=3).squeeze()  #shape = 6, 1281
rs_std = np.std(rs_open_pxx[f][:, :, :, channel_indices, :], axis=3).squeeze()   #shape = 6, 1281

# average over epochs, these are the values to use in the z-score calculation
rs_mean = np.mean(rs_mean, axis=0)  #shape = 1281
rs_std = np.std(rs_std, axis = 0)   #shape = 1281

# Calculate Z Score for all epochs

In [6]:
# Preallocate storage for z-score results
eeg_zscore_pxx = [None] * len(files)

for f, file in enumerate(files):
    eeg_zscore_pxx[f] = {}  # Dictionary to store per-stimulus z-scores

    # Compute Z-score for each stimulus
    for stim_label in eeg_pxx[f].keys():
        eeg_pxx_values = eeg_pxx[f][stim_label]  # Already averaged over channels, shape (num_epochs, freqs)

        zscores_per_epoch = []  # Store z-scores for each epoch

        for epoch in range(len(eeg_pxx_values)):  
            # Compute z-score for the current epoch
            zscore_epoch = (eeg_pxx_values[epoch, :] - rs_mean) / rs_std  # Shape: (freqs,)
            zscores_per_epoch.append(zscore_epoch)  # Store the z-scores for this epoch

        # Convert to array for consistency (epochs, freqs)
        zscores_per_epoch = np.array(zscores_per_epoch)  # Shape (num_epochs, 1281)

        # Store z-scores per stimulus (no averaging across epochs)
        eeg_zscore_pxx[f][stim_label] = zscores_per_epoch  # Shape (num_epochs, 1281)

# Plot settings
plot_zscore = True  # Enable to see plots
f_limits = [8, 12]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted
stimulus_index = 0  # Select index of stimulus to be plotted

if plot_zscore:
    # Get the stimulus label to plot
    stim_labels = list(eeg_zscore_pxx[file_to_plot].keys())
    stim_label_to_plot = stim_labels[stimulus_index]

    # Get the frequency values for the chosen stimulus
    freqs = eeg_f[file_to_plot][stim_label_to_plot][0]  # Assuming all epochs have the same frequency values

    # Get the z-scores for the chosen stimulus
    zscore_values = eeg_zscore_pxx[file_to_plot][stim_label_to_plot]  # Shape: (num_epochs, freqs)

    # Create subplots: one subplot per epoch
    num_epochs = zscore_values.shape[0]
    fig, axes = plt.subplots(num_epochs, 1, figsize=(10, 6 * num_epochs))

    # If only one subplot (i.e., 1 epoch), axes will be a single Axes object instead of an array
    if num_epochs == 1:
        axes = [axes]

    for epoch_index in range(num_epochs):
        # Plot the Z-score for each epoch
        axes[epoch_index].plot(freqs, zscore_values[epoch_index, :], label=f'Epoch {epoch_index + 1}')
        axes[epoch_index].set_xlim(f_limits)
        axes[epoch_index].set_ylim(-5, 10)
        axes[epoch_index].set_xlabel('Frequency (Hz)')
        axes[epoch_index].set_ylabel('Z-score')
        axes[epoch_index].set_title(f'Z-score of PSD for {stim_label_to_plot} (Epoch {epoch_index + 1})')
        axes[epoch_index].grid(True)
        axes[epoch_index].legend()

    plt.tight_layout()  # Adjust subplots to fit
    plt.show()


# Average Z scores per stimulus

In [11]:
# Compute the mean of the z-scores across all epochs for each stimulus
eeg_zscore_mean = [None] * len(files)

for f, file in enumerate(files):
    eeg_zscore_mean[f] = {}  # Dictionary to store per-stimulus mean z-scores

    # Compute mean z-score for each stimulus
    for stim_label in eeg_zscore_pxx[f].keys():
        zscore_values = eeg_zscore_pxx[f][stim_label]  # Shape: (num_epochs, freqs)

        # Compute mean z-score across epochs
        mean_zscore = np.mean(zscore_values, axis=0)  # Shape: (freqs,)

        # Store mean z-scores per stimulus
        eeg_zscore_mean[f][stim_label] = mean_zscore  # Shape: (freqs,)

# Plot settings
plot_zscore_mean = True  # Enable to see plots
f_limits = [8, 12]  # Frequency limits for the plots [min, max][Hz]
file_to_plot = 0    # Select index of file to be plotted
stimulus_index = 2  # Select index of stimulus to be plotted

if plot_zscore_mean:
    # Get the stimulus label to plot
    stim_labels = list(eeg_zscore_mean[file_to_plot].keys())
    stim_label_to_plot = stim_labels[stimulus_index]

    # Get the frequency values for the chosen stimulus
    freqs = eeg_f[file_to_plot][stim_label_to_plot][0]  # Assuming all epochs have the same frequency values

    # Get the mean z-scores for the chosen stimulus
    zscore_mean_values = eeg_zscore_mean[file_to_plot][stim_label_to_plot]  # Shape: (freqs,)

    # Plot the mean Z-score for the chosen stimulus
    plt.figure(figsize=(10, 6))
    plt.plot(freqs, zscore_mean_values, label=f'Mean Z-score of {stim_label_to_plot}')
    plt.xlim(f_limits)
    plt.ylim(-5, 10)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Z-score')
    plt.title(f'Mean Z-score of PSD for {stim_label_to_plot}')
    plt.grid(True)
    plt.legend()
    plt.show()

# Filter Z-score data to a band around stim freq
- Stim frequency is 10 Hz, filter so only the z scores from 8.0 Hz to 12.0 Hz are saved

In [8]:
# Filter Z scores between 8-12 Hz
f_low = 8.0
f_high = 12.0

# Preallocate storage for filtered Z scores
filtered_zscores = [None] * len(files)

for f, file in enumerate(files):
    filtered_zscores[f] = {}  # Dictionary to store per-stimulus filtered Z scores

    # Compute filtered Z scores for each stimulus
    for stim_label in eeg_zscore_pxx[f].keys():
        zscores_per_epoch = eeg_zscore_pxx[f][stim_label]  # Shape: (num_epochs, 1281)

        # Filter Z scores between 8-12 Hz
        fmask = (freqs >= f_low) & (freqs <= f_high)
        filtered_zscores[f][stim_label] = zscores_per_epoch[:, fmask]  # Shape: (num_epochs, num_freqs)

# Export Z score (all and filtered)

In [None]:
save_zscore = False

if save_zscore:
    # Export to CSV
    for f, file in enumerate(files):
        # Create a DataFrame to store all Z-scores for the file
        # Initialize data dictionary with frequencies
        data = {'Frequency (Hz)': eeg_f[f][list(eeg_f[f].keys())[0]][0]}  # Assuming same frequencies for all stimuli

        # For each stimulus, flatten the Z-scores and add them to the DataFrame
        for stim_label in eeg_zscore_pxx[f].keys():
            # Flatten the Z-scores for each epoch (this will convert it to a 1D array for each epoch)
            zscore_flat = eeg_zscore_pxx[f][stim_label].flatten()

            # Now create separate columns for each epoch's Z-scores
            for epoch_index in range(eeg_zscore_pxx[f][stim_label].shape[0]):
                data[f'{stim_label}_Epoch_{epoch_index + 1}'] = eeg_zscore_pxx[f][stim_label][epoch_index, :]

        # Convert to DataFrame
        df = pd.DataFrame(data)
        # Save to CSV
        csv_filename = "zscore_all.csv"
        df.to_csv(csv_filename, index=False)

        # Save filtered Z-scores to CSV
        # Initialize data dictionary with frequencies
        data = {'Frequency (Hz)': freqs[fmask]}  # Assuming same frequencies for all stimuli

        # For each stimulus, flatten the filtered Z-scores and add them to the DataFrame
        for stim_label in filtered_zscores[f].keys():
            # Flatten the filtered Z-scores for each epoch (this will convert it to a 1D array for each epoch)
            zscore_flat = filtered_zscores[f][stim_label].flatten()

            # Now create separate columns for each epoch's filtered Z-scores
            for epoch_index in range(filtered_zscores[f][stim_label].shape[0]):
                data[f'{stim_label}_Epoch_{epoch_index + 1}'] = filtered_zscores[f][stim_label][epoch_index, :]

        # Convert to DataFrame
        df = pd.DataFrame(data)
        # Save to CSV
        csv_filename = "filtered_zscore.csv"
        df.to_csv(csv_filename, index=False)
