<a href="https://colab.research.google.com/github/sergiopdl/sPCI_sLZc/blob/main/analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#READ ME

1.   Create the following directories in the root of your Google Drive (replacing "Example" with any desired name of your project):
*  /Example/Data/Inputs/Activations/
*  /Example/Data/Inputs/Events/
*  /Example/Data/Outputs/Timepoints/
*  /Example/Data/Outputs/Measures/
*  /Example/Data/Outputs/Measures/Filtered/

2.   Put your 2x2 activation matrix .CSVs (channels x timepoints, 1 file per subject) in /Example/Data/Inputs/Activations/.
3.   Put your corresponding EventList .TXTs (1 per subject) in /Example/Data/Inputs/Events/.
4.   Run each code chunk from top to bottom (PCI code, Python imports, and helper functions) until you get to Main.
5.  In the first code chunk in Main, configure the parameters accordingly.
6.   Run the second code in chunk Main, which outputs one file for each subject with PCI and LZc in /Example/Data/Outputs/Measures/Filtered/.
7.   Run the last code chunk to aggregate the outputs across all subjects into one file for analysis in R.







#Library installations

In [None]:
!pip install lempel_ziv_complexity
from lempel_ziv_complexity import lempel_ziv_complexity

In [None]:
!pip install git+https://github.com/renzocom/PCIst.git
from PCIst import pci_st

# Python imports

In [110]:
import numpy as np
from numpy import linalg
from scipy.signal import hilbert
import pandas as pd
import math
import os
import matplotlib.pyplot as plt
import sys
import statistics

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Helper functions


## Helper function to compute LZc

In [112]:
def compute_LZc(np_signal_windowed, response_start_absolute, baseline_end_absolute):
  """
  Computes LZc for a single trial based on a baseline and response window;
    returns the result.

  Args:
    np_signal_windowed (2-d np array of floats): EEG activation values (channels x samples) windowed around a given event.
    response_start_absolute (int): The absolute timepoint when response periods start, eg 104 for 400ms baselines.
    baseline_end_absolute (int): The absolute timepoint when baseline periods end, eg 90 for 400ms baselines.

  Returns:
    (float): LZc for a single trial.
  """

  # Create empty matrix for binarized data
  ncol = len(np_signal_windowed[0, response_start_absolute:])                   # For computing LZc on response data
  binarized_matrix = np.empty((0, ncol))
  lzc_per_channel = []                                                          # For computing LZc per channel (and taking the average)

  # Iterate all channels in EEG data
  for channel, signal in enumerate(np_signal_windowed):
    # EEG values are binarized using the (mean of the) instantaneous amplitude (absolute value) of the analytical (Hilbert-transformed) signal
    # per https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0133532

    # Start by hilbert-transforming the EEG data to get the analytical values
    hilbert_values_baseline = hilbert(signal[:baseline_end_absolute])           # For binarizing LZc on baseline data
    hilbert_values_response = hilbert(signal[response_start_absolute:])         # For computing LZc on response data

    # Find the "instantaneous amplitude" by taking the absolute value of the analytic values
    instantaneous_amplitude_values_baseline = np.abs(hilbert_values_baseline)   # For binarizing LZc on baseline data
    instantaneous_amplitude_values_response = np.abs(hilbert_values_response)   # For computing LZc on response data

    # Take the mean of the instantaneous amplitude
    mean_amplitude = np.mean(instantaneous_amplitude_values_baseline)

    # Binarize the instantaneous amplitude values based on the mean
    binarized_values = np.where(instantaneous_amplitude_values_response > mean_amplitude, 1, 0)

    # Add this binarized channel to the final binarized matrix
    binarized_matrix = np.vstack([binarized_matrix, binarized_values])

    # Compute LZc per channel (and take the average)
    # flattened_string = "".join(binarized_values.astype(str))                  # Equivalent to .join(map(str, arr))
    # lzc_per_channel.append(lempel_ziv_complexity(flattened_string))

  # Flatten the matrix (timepoint by timepoint, 'F' for column-major) for conversion to string for LZc calculation
  flattened_matrix = binarized_matrix.flatten('F')

  # Convert the flattened matrix to a binary string for LZc calculation
  binary_string = "".join(flattened_matrix.astype(int).flatten().astype(str))   # The order of astype() and flatten() can matter for non-binary values (per ChatGPT)

  # Calculate LZc
  return lempel_ziv_complexity(binary_string)                                   # Alternative LZc implementation, but need to convert to single value: https://rosettacode.org/wiki/LZW_compression#Python

  # Calculate average LZc (per channel)
  # return statistics.mean(lempel_values_per_channel)

## Helper function to compute PCI

In [113]:
def compute_PCI(row, np_signal, baseline_start_factor, response_end_factor, snr_param, k_param, baseline_buffer):
  """
  Computes PCI for a single trial based on a baseline and response window;
    returns the result.

  Args:
    row (dictionary): EventList and Timepoint data for a single trial.
    np_signal (2-d np array of floats): All EEG activation values (channels x samples) per subject.
    baseline_start_factor (int): The relative timepoint when baseline periods start, eg -103 for 400ms baselines.
    response_end_factor (int): The relative timepoint when response periods end, eg 90 for 400ms responses.
    snr_param (float): Selects principal components with a signal-to-noise ratio (SNR) > min_snr.
    k_param (float > 1): Noise control parameter.
    baseline_buffer: Number of samples for a 50ms buffer between baseline_end and response_start

  Returns:
    (float): PCI for a single trial.
  """

  # Set parameters for PCI calculation
  # To offset the response window from the baseline window by 1 timepoint
  alignment_buffer = 1

  # Compute "absolute" edges of the baseline and response windows for the PCI par values
  baseline_end_absolute = int((baseline_start_factor * -alignment_buffer) - baseline_buffer)
  response_start_absolute = int(baseline_end_absolute + alignment_buffer + baseline_buffer)
  response_end_absolute = int(response_start_absolute + response_end_factor)
  par = {'baseline_window':(0, baseline_end_absolute), 'response_window':(response_start_absolute, response_end_absolute),
         'k': k_param, 'min_snr': snr_param, 'max_var': 99, 'embed': False, 'n_steps': 100}

  # Pull out the actual window start and end timepoints from the eventlist data
  baseline_start_timepoint = math.floor(row['baseline_start_timepoint'])
  response_end_timepoint = math.floor(row['response_end_timepoint'])

  # Create an np array for the activation data from all channels for just the corresponding window
  np_signal_windowed = np_signal[:, baseline_start_timepoint:response_end_timepoint]

  # Create an np array for just the corresponding timepoints for the PCI calculation
  np_timepoints = np.arange(np_signal_windowed.shape[1])

  # Calculate PCI
  return (pci_st.calc_PCIst(np_signal_windowed, np_timepoints, **par),
          np_signal_windowed, par, response_start_absolute, baseline_end_absolute, baseline_buffer)

## Helper function to compute both measures (PCI and LZc)

In [114]:
def compute_measures(df_onset_times_binned_timepoints, np_signal, file, baseline_start_factor,
                     response_end_factor, output_measures, input_output_measures_filtered,
                     subject_string, snr_param, k_param, n_channels, dataset, baseline_buffer):
  """
  Computes PCI and LZc for all trials (per subject);
    saves the results (CSVs) to Google Drive.

  Args:
    df_onset_times_binned_timepoints (df): EventList and timepoint data for all trials (per subject).
    np_signal (2-d np array of floats): All EEG activation values (channels x samples) per subject (for windowing).
    file (string): File name of the current file.
    baseline_start_factor (int): The relative timepoint when baseline periods start, eg -103 for 400ms baselines.
    response_end_factor (int): The relative timepoint when response periods end, eg 90 for 400ms responses.
    output_measures (string): Google Drive path for outputting PCI/LZc results.
    input_output_measures_filtered (string): Google Drive path for outputting results filtered to core columns.
    subject_string (int): ID of current subject, incremented by 1 for filename string outputs.
    snr_param (float): Selects principal components with a signal-to-noise ratio (SNR) > min_snr.
        Passed to computePCI().
    k_param (float > 1): Noise control parameter.
        Passed to computePCI().
    n_channels (int): Number of EEG channels specified (for output filename).
    dataset: Name of dataset (for output filename).
    baseline_buffer: Number of samples for a 50ms buffer between baseline_end and response_start
        Passed to compute_PCI().
  """

  # Create new df for final PCI/LZc results
  df_measures = df_onset_times_binned_timepoints

  # Loop through all events in the eventlist data
  for event, row in df_onset_times_binned_timepoints.iterrows():

    # Compute PCI
    pci, np_signal_windowed, par, response_start_absolute, baseline_end_absolute, baseline_buffer = compute_PCI(row,
                                                                                                 np_signal,
                                                                                                 baseline_start_factor,
                                                                                                 response_end_factor,
                                                                                                 snr_param,
                                                                                                 k_param,
                                                                                                 baseline_buffer)

    # Skip first row if start window goes negative, or last row if end window goes beyond available data
    if (np_signal_windowed.size == 0) or (np_signal_windowed[0].size < -baseline_start_factor + response_end_factor):
      print("ERROR: PCI/LZc WINDOW LARGER THAN SIGNAL", -baseline_start_factor + response_end_factor, np_signal_windowed[0].size)
      print("SETTING PCI AND LZC TO 0 AND CONTINUING TO NEXT ROW/TRIAL/SUBJECT")
      df_measures.loc[event, "pci"] = 0
      df_measures.loc[event, "lzc"] = 0

    else:

      # Add the PCI values to the results df
      df_measures.loc[event, "pci"] = pci

      # Compute LZc
      lzc = compute_LZc(np_signal_windowed, response_start_absolute, baseline_end_absolute)

      # Add the LZc values to the results df
      df_measures.loc[event, "lzc"] = lzc

  # Save new dfs with PCI results data to Google Drive
  df_measures.to_csv(output_measures + file[0:-4] +
                     "_chann" + str(n_channels) +
                     "_base" + str(baseline_start_factor) + "_-" + str(baseline_buffer) +
                     "_resp" + str(response_end_factor) +
                     "_snr" + str(snr_param) +
                     "_k" + str(k_param) +
                     "_timepoints_pci_lzc.csv")

  df_measures_filtered = df_measures[["ecode", "a_flags", "bin", "pci", "lzc"]]
  df_measures_filtered.to_csv(input_output_measures_filtered + subject_string + "_" + dataset +
                              "_chann" + str(n_channels) +
                              "_base" + str(baseline_start_factor) + "_-" + str(baseline_buffer) +
                              "_resp" + str(response_end_factor) +
                              "_snr" + str(snr_param) +
                              "_k" + str(k_param) +
                              "_ecode_aflags_bin_pci_lzc.csv")

## Compute timepoints/windows

In [115]:
def compute_timepoints(df_onset_times_binned, input_output_timepoints, sample_rate,
                       baseline_start_factor, response_end_factor, file, n_channels, baseline_buffer):
  """
  Converts trial onset times to timepoints (per subject);
    computes baseline start and response end timepoints;
    saves new dfs to Google Drive.

  Args:
    df_onset_times_binned (df): EventList data for all trials (per subject).
    input_output_timepoints (string): Google Drive path for outputting timepoints.
    sample_rate (int): EEG sample rate (for timepoint conversion).
    baseline_start_factor (int): The relative timepoint when baseline periods start, eg -103 for 400ms baselines.
    response_end_factor (int): The relative timepoint when response periods end, eg 90 for 300ms responses.
    file (string): File name of the current file.
    n_channels (int): Number of EEG channels specified (for output filename).
    baseline_buffer: Number of samples for a 50ms buffer between baseline_end and response_start

  Returns:
    df_onset_times_binned_timepoints (df): EventList and timepoint data for all trials (per subject).
  """

  # Add new columns for timepoints and windows
  df_onset_times_binned_timepoints = df_onset_times_binned.copy()
  df_onset_times_binned_timepoints['timepoint'] = df_onset_times_binned_timepoints['onset'].apply(lambda x: x * sample_rate)
  df_onset_times_binned_timepoints['baseline_start_timepoint'] = df_onset_times_binned_timepoints['onset'].apply(lambda x: x * sample_rate + baseline_start_factor)
  df_onset_times_binned_timepoints['response_end_timepoint'] = df_onset_times_binned_timepoints['onset'].apply(lambda x: x * sample_rate + response_end_factor)

  # Add sanity check that response_end_timepoint ends before next baseline_start_timepoint?

  # Save new df with timepoint/window data to Google Drive
  df_onset_times_binned_timepoints.to_csv(input_output_timepoints + file[0:-4] +
                                          "_chann" + str(n_channels) +
                                          "_base" + str(baseline_start_factor) + "_-" + str(baseline_buffer) +
                                          "_resp" + str(response_end_factor) +
                                          "_timepoints.csv")

  return df_onset_times_binned_timepoints

## Convert eventlist data

In [116]:
def convert_eventlist_data(dataset, input_events, eventlist_files_sorted, subject_int, ignore_rows):
  """
  Converts EventList data from .txt to .csv (including bins).

  Args:
    dataset: Name of dataset (for output filename).
    input_events (string): Google Drive path for inputting EventList data (for all subjects).
    eventlist_files_sorted (list of strings): List of EventList filenames, sorted increasing to match subject IDs.
    subject_int (int): ID of current subject.

  Returns:
    df_onset_times_binned (df): EventList data with bins for all trials (per subject).
  """

  df_onset_times = pd.read_csv(input_events + eventlist_files_sorted[subject_int],
                               skiprows=ignore_rows, delim_whitespace=True,
                               header=None)                                     # Ignore header/column names due to formatting inconsistencies

  # Remove erroneous column and add correct column names back to the df
  df_onset_times.drop(df_onset_times.columns[[9,10]], axis=1, inplace=True)
  df_onset_times.columns =['item', 'bepoch', 'ecode', 'label', 'onset', 'diff', 'dura',
                           'b_flags', 'a_flags', 'enable']

  # Convert "enable" column to bins
  df_onset_times_binned = df_onset_times
  df_onset_times_binned['bin'] = df_onset_times_binned['enable'].apply(lambda x: 1 if x.startswith('1')
                                                          else (2 if x.startswith('2')
                                                          else (3 if x.startswith('3')
                                                          else (4 if x.startswith('4')
                                                          else 0))))

  return df_onset_times_binned

## Run subjects

In [119]:
def run_subjects(activation_files_sorted, input_activations, dataset, input_events,
                 eventlist_files_sorted, input_output_timepoints, output_measures,
                 input_output_measures_filtered, baseline_start_factor, response_end_factor,
                 snr_param, k_param, timepoint_files_sorted, n_subjects, sample_rate,
                 n_channels, ignore_rows, baseline_buffer):
  """
  Loops through all subjects;
    Reads in activation data;
    Converts corresponding EventList data;
    Computes corresponding timepoints (for PCI/LZc);
    Computes PCI/LZC;
    Outputs metadata.

  Args:
    activation_files_sorted (list of strings): List of EEG activation filenames, sorted increasing to match subject IDs.
    input_activations (string): Google Drive path for inputting Acivations data (for all subjects)
    dataset: Name of dataset (for output filename). Passed to convert_eventlist_data().
    input_events (string): Google Drive path for inputting EventList data (for all subjects).
        Passed to convert_eventlist_data().
    eventlist_files_sorted (list of strings): List of EventList filenames, sorted increasing to match subject IDs.
        Passed to convert_eventlist_data().
    input_output_timepoints (string): Google Drive path for outputting timepoints.
        Passed to compute_timepoints().
    output_measures (string): Google Drive path for outputting PCI/LZc results.
        Passed to compute_measures().
    input_output_measures_filtered (string): Google Drive path for outputting results filtered to core columns.
        Passed to compute_measures().
    baseline_start_factor (int): The relative timepoint when baseline periods start, eg -103 for 400ms baselines.
        Passed to compute_timepoints().
    response_end_factor (int): The relative timepoint when response periods end, eg 90 for 400ms responses.
        Passed to compute_timepoints().
    snr_param (float): Selects principal components with a signal-to-noise ratio (SNR) > min_snr.
        Passed to compute_measures().
    k_param (float > 1): Noise control parameter. Passed to compute_measures().
    timepoint_files_sorted (list of strings): List of timepoint filenames, sorted increasing to match subject IDs.
        (If > 0, doesn't re-run convert_eventlist() and compute_timepoints()).
    n_subjects (int): Number of subjects specified.
    sample_rate (int): EEG sample rate (for timepoint conversion).
        Passed to compute_timepoints().
    n_channels (int): Number of EEG channels specified (for output filename).
        Passed to compute_timepoints().
    baseline_buffer: Number of samples for a 50ms buffer between baseline_end and response_start
        Passed to compute_measures().

  Returns:
    (no value): Script compltes.
  """

  # For activation-data sanity check and onset-time conversion
  seconds_per_minute = 60

  # Create lists for metadata
  n_subjects_list = []
  n_samples = []
  n_seconds = []
  n_minutes = []

  # Loop through all activation/eventlist files (one per subject)
  for subject_int, file in enumerate(activation_files_sorted):

    print()
    print(file)
    # Break after n_subjects for debugging
    if (subject_int + 1) == n_subjects:
      break

    # Save subject ID for outputting to filenames
    subject_string = file.partition("_")[0]
    n_subjects_list.append(subject_string)

    # Read in activation data for one subject
    df_activations = pd.read_csv(input_activations + file, header=None)

    # Convert activation data to numpy array for PCI/LZc calculation
    np_signal = df_activations.iloc[:n_channels, :].to_numpy()
    print("Shape (channels x samples):", np_signal.shape) # debug
    n_samples.append(np_signal.shape[1])

    # Sanity check that there are approximately 10-12 minutes worth of data
    #   per: https://www.sciencedirect.com/science/article/pii/S1053811920309502?via%3Dihub
    n_seconds.append(np_signal.shape[1] / sample_rate)
    n_minutes.append(np_signal.shape[1] / sample_rate / seconds_per_minute)
    # print("Minutes of data sanity check:", np_signal.shape[1] / sample_rate / seconds_per_minute) # debug

    # Check for existing timepoint data
    if(len(timepoint_files_sorted) > 0): # REFACTOR TO CHECK FOR ACTUAL ENTRY
      df_onset_times_binned_timepoints = pd.read_csv(input_output_timepoints + timepoint_files_sorted[subject_int])

    else:
      # Convert eventlist data
      df_onset_times_binned = convert_eventlist_data(dataset, input_events, eventlist_files_sorted, subject_int, ignore_rows)

      # Compute timepoints/windows
      df_onset_times_binned_timepoints = compute_timepoints(df_onset_times_binned,
                                                          input_output_timepoints,
                                                          sample_rate, baseline_start_factor,
                                                          response_end_factor, file, n_channels, baseline_buffer)

    # Compute PCI and LZc
    compute_measures(df_onset_times_binned_timepoints, np_signal, file, baseline_start_factor,
                     response_end_factor, output_measures, input_output_measures_filtered,
                     subject_string, snr_param, k_param, n_channels, dataset, baseline_buffer)

  # Create metadata df and output to CSV
  df_metadata = pd.DataFrame({'n_subjects': len(n_subjects),
                              'n_samples': n_samples,
                              'n_seconds': n_seconds,
                              'n_minutes': n_minutes})
  df_metadata.to_csv(output_general + dataset + "_subject_metadata.csv")

  return

## Load data/files

In [118]:
def load_data(input_activations, input_events, input_output_timepoints):
  """
  Loads EEG activation, Eventlist, and timepoint data from Google Drive.

  Args:
    input_activations (string): Google Drive path for inputting Acivations data (for all subjects)
    input_events (string): Google Drive path for inputting EventList data (for all subjects).
    input_output_timepoints (string): Google Drive path for outputting timepoints.

  Returns:
    activation_files_sorted (list of strings): List of EEG activation filenames, sorted increasing to match subject IDs.
    eventlist_files_sorted (list of strings): List of EventList filenames, sorted increasing to match subject IDs.
    timepoint_files_sorted (list of strings): List of timepoint filenames, sorted increasing to match subject IDs.
  """

  # Grab all activation files and sort alphabetically
  activation_files = [x for x in os.listdir(input_activations)]
  activation_files_sorted = sorted(activation_files, key=lambda x: int(x.partition("_")[0]))

  # Grab all eventlist files and sort alphabetically to match order of activation files
  eventlist_files = [x for x in os.listdir(input_events)]
  eventlist_files_sorted = sorted(eventlist_files, key=lambda x: int(x.partition("_")[0]))

  # Grab all timepoint files and sort alphabetically to match order of activation files
  timepoint_files = [x for x in os.listdir(input_output_timepoints)]
  timepoint_files_sorted = sorted(timepoint_files, key=lambda x: int(x.partition("_")[0]))

  # If the number of files don't match, stop running
  if(len(activation_files) != len(eventlist_files)):
    raise ValueError('Mismatching file numbers!')

  return (activation_files_sorted, eventlist_files_sorted, timepoint_files_sorted)

# Main

### Configure

In [None]:
# Set parameters for dataset
dataset = "Example"
n_subjects = 1
n_channels = 19
sample_rate = 512

# Rows to ignore in Eventlist.txt files (30 for N170 ; 28 for P3 and MMN)
ignore_rows = 30

# Set PCI window parameter values for conversion to timepoints
baseline_start_ms = -400 # PCIst default
#baseline_end_ms = -50
#repsonse_start_ms = 0
response_end_ms = 300 + 100 # PCIst default + 100 ms for visual sensory processing (eg P100)

# Subtract samples for a 50ms buffer between baseline end and response start (per default PCI par values)
baseline_buffer = math.ceil((50 / 1000) * sample_rate)

# Convert PCI window parameter values to 0-relative timepoint factors
baseline_start_factor = math.floor(baseline_start_ms / 1000 * sample_rate)
response_end_factor = math.ceil(response_end_ms / 1000 * sample_rate)

"""
The following parameters may be suboptimal for evoked signals with lower signal-to-noise ratio,
  such as those produced by peripheral stimulation, where it may be necessary to increase k
  and/or the minimum SNR (1.8 per Alessandra Dallavecchia) to control for stationary baseline-like activations.
"""
snr_param = 1.1
k_param = 1.2

# Set Google Drive locations for raw data files
input_activations = "/content/drive/MyDrive/" + dataset + "/Data/Inputs/Activations/"
input_events = "/content/drive/MyDrive/" + dataset + "/Data/Inputs/Events/"
input_output_timepoints = "/content/drive/MyDrive/" + dataset + "/Data/Outputs/Timepoints/"
output_measures = "/content/drive/MyDrive/" + dataset + "/Data/Outputs/Measures/"
input_output_measures_filtered = "/content/drive/MyDrive/" + dataset + "/Data/Outputs/Measures/Filtered/"
output_general = "/content/drive/MyDrive/" + dataset + "/Data/"

# Initialize empty lists for sorted files
activation_files_sorted = []
eventlist_files_sorted = []
timepoint_files_sorted = []

# Load data
(activation_files_sorted, eventlist_files_sorted, timepoint_files_sorted) = load_data(input_activations,
                                                                                      input_events,
                                                                                      input_output_timepoints)

### Run

In [None]:
# Main function call
run_subjects(activation_files_sorted, input_activations, dataset, input_events,
             eventlist_files_sorted, input_output_timepoints, output_measures,
             input_output_measures_filtered, baseline_start_factor, response_end_factor,
             snr_param, k_param, timepoint_files_sorted, n_subjects, sample_rate,
             n_channels, ignore_rows, baseline_buffer)

# Output subject-aggreated df for analysis in R

In [None]:
# Read back in trial-by-trial PCI results and sort by subject id
input_output_measures_filtered = "/content/drive/MyDrive/" + dataset + "/Data/Outputs/Measures/Filtered/"
results_files = [x for x in os.listdir(input_output_measures_filtered)]
results_files_sorted = sorted(results_files, key=lambda x: int(x.partition("_")[0]))

# Initialize new df for aggregate of all subjects
aggregated_df = pd.DataFrame()

# Loop through all subjects
for subject, file in enumerate(results_files_sorted):

  # Read in results, rename trial column, and add subject_id and session columns
  df_pci_filtered = pd.read_csv(input_output_measures_filtered + file)
  df_pci_filtered.rename(columns = {'Unnamed: 0':'trial'}, inplace=True)
  df_pci_filtered.rename(columns = {'bin':'condition'}, inplace=True)
  df_pci_filtered['subject_id'] = subject
  session = file[5]
  df_pci_filtered['session'] = session
  aggregated_df = pd.concat([aggregated_df, df_pci_filtered])

# Reset a new sequential index
aggregated_df = aggregated_df.reset_index(drop=True)

# Save final aggregated csv for analysis in R
aggregated_df.to_csv(input_output_measures_filtered + dataset + "_chann" + str(n_channels) +
                     "_base" + str(baseline_start_factor) + "_-" + str(baseline_buffer) +
                     "_resp" + str(response_end_factor) +
                     "_snr" + str(snr_param) +
                     "_k" + str(k_param) +
                     "_trial_ecode_aflags_condition_pci_lzc_subjectID_session_aggregated.csv")