# Build subset of SEEG recording containing relevant HFOs and some background activity


### Check WD (change if necessary) and file loading

In [84]:
# Show current directory
import os
curr_dir = os.getcwd()
print(curr_dir)

# Check if the current WD is the file location
if "/src/seeg_data/synthetic" not in os.getcwd():
    # Set working directory to this file location
    file_location = f"{os.getcwd()}/thesis-lava/src/seeg_data/synthetic"
    print("File Location: ", file_location)

    # Change the current working Directory
    os.chdir(file_location)

    # New Working Directory
    print("New Working Directory: ", os.getcwd())

PATH_TO_FILE = '' # 'src/hfo/'  # This is needed if the WD is not the same as the file location

/home/monkin/Desktop/feup/thesis/thesis-lava/src/seeg_data/synthetic


In [85]:
import numpy as np
import math
from utils.io import preview_np_array

seeg_file_name = "filtered_seeg_ch90-119_ripple_band.npy"
recorded_data = np.load(f"{PATH_TO_FILE}{seeg_file_name}")

print("Data shape: ", recorded_data.shape)
print("First time steps: ", recorded_data[:10])

Data shape:  (245760, 30)
First time steps:  [[ 1.84829940e-04  4.37174409e-06  2.71047998e-04  9.56092800e-05
   3.69719302e-04 ... -5.77643495e-04 -1.35636408e-03 -5.28126862e-04
  -6.84353879e-04 -5.91987150e-04]
 [ 1.37975809e-03 -2.33040380e-04  1.61508759e-03  6.13719991e-04
   2.90657316e-03 ... -5.14801662e-03 -1.23772766e-02 -4.77473965e-03
  -5.44621734e-03 -5.34139889e-03]
 [ 3.92964380e-03 -3.06118826e-03  2.97532191e-03  8.55872179e-04
   1.10917261e-02 ... -2.23060233e-02 -5.32362499e-02 -2.06351304e-02
  -2.05310798e-02 -2.27491617e-02]
 [ 3.17023234e-03 -1.71566387e-02 -3.48900098e-03 -3.68384663e-03
   2.86181389e-02 ... -6.22837269e-02 -1.42782212e-01 -5.67303129e-02
  -4.97123145e-02 -6.10809133e-02]
 [-1.28319667e-02 -5.79471122e-02 -3.12810671e-02 -1.89557456e-02
   5.96898282e-02 ... -1.22912538e-01 -2.63603038e-01 -1.11133673e-01
  -8.80023720e-02 -1.15612746e-01]
 [-5.38320479e-02 -1.32887610e-01 -8.74300199e-02 -4.17567969e-02
   1.11892550e-01 ... -1.72840862e

## Define Global Parameters of the Experiment

In [64]:
sampling_rate = 2048    # 2048 Hz
input_duration = 120 * (10**3)    # 120000 ms or 120 seconds
num_samples = recorded_data.shape[0]    # 2048 * 120 = 245760
num_channels = recorded_data.shape[1]   # 960

x_step = 1/sampling_rate * (10**3)  # 0.48828125 ms

## Define the Channels of Interest that will be used to extract the signal

In [65]:
ch_start_idx = 0    # 90  # Start index of the region of interest
ch_end_idx = ch_start_idx + 30 # End index of the region of interest 

### Extract the SEEG channels in the defined range from the SEEG data

In [66]:
# Extract the seeg_channels in the range [ch_start_idx, ch_end_idx]
relevant_seeg = recorded_data[:, ch_start_idx:ch_end_idx]

preview_np_array(relevant_seeg, "SEEG Channels")

SEEG Channels Shape: (245760, 30).
Preview: [[ 9.55236180e-04  2.25940024e-05  1.40082745e-03  4.94126889e-04
   1.91077946e-03 ... -2.98537112e-03 -7.00994679e-03 -2.72945977e-03
  -3.53687062e-03 -3.05950185e-03]
 [ 3.10524546e-03 -1.29961370e-03  2.44363903e-03  1.08944605e-03
   6.96921504e-03 ... -1.40248198e-02 -3.44264299e-02 -1.31741286e-02
  -1.32418198e-02 -1.47118505e-02]
 [-4.78473281e-03 -1.06279148e-02 -1.25298819e-02 -6.37920586e-03
   3.93507441e-03 ... -1.86506700e-02 -4.19363601e-02 -1.68170844e-02
  -5.84467919e-03 -1.71137287e-02]
 [-3.07128341e-02 -2.82111952e-02 -3.73397480e-02 -2.04514460e-02
  -1.27484110e-02 ...  2.12188831e-02  7.87003814e-02  2.39371187e-02
   3.86770578e-02  3.17849448e-02]
 [-2.51471142e-02 -9.93260958e-03 -2.08226125e-03  6.91402108e-03
  -6.50262002e-03 ...  1.01256123e-01  2.66136463e-01  8.73138641e-02
   5.66964419e-02  9.98328584e-02]
 ...
 [-1.37146542e-01 -2.79082134e-01 -4.74749222e-01  1.20965138e-01
  -4.73948365e-01 ... -3.33114

## Import the Markers (Annotated Events) 
The markers are stored in a numpy array of shape (num_channels, events):
- Each row represents the events of a channel
- Each event is composed of the following 3 fields (Label, Position, Shape)

In [67]:
markers_seeg_file_name = "filtered_seeg_ch90-119_markers.npy"
markers = np.load(f"{PATH_TO_FILE}{markers_seeg_file_name}")

print("Markers shape: ", markers.shape)
print("First time steps: ", markers[:10])

Markers shape:  (30, 42)
First time steps:  [[('Spike',   1000.  , 0.) ('Spike+Fast-Ripple',   4218.75, 0.)
  ('Ripple+Fast-Ripple',   6966.8 , 0.)
  ('Ripple+Fast-Ripple',   9793.95, 0.)
  ('Spike+Ripple+Fast-Ripple',  13189.  , 0.) ...
  ('Ripple+Fast-Ripple', 107859.  , 0.) ('Spike+Ripple', 111439.  , 0.)
  ('Ripple', 114551.  , 0.) ('Spike+Ripple', 116517.  , 0.)
  ('Spike+Ripple', 119000.  , 0.)]
 [('Spike+Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3917.97, 0.)
  ('Spike+Fast-Ripple',   6794.92, 0.) ('Fast-Ripple',   9094.73, 0.)
  ('Fast-Ripple',  12348.1 , 0.) ... ('Spike+Fast-Ripple', 107697.  , 0.)
  ('Fast-Ripple', 111079.  , 0.) ('Spike', 113382.  , 0.)
  ('Ripple+Fast-Ripple', 116656.  , 0.) ('Spike+Ripple', 119000.  , 0.)]
 [('Fast-Ripple',   1000.  , 0.) ('Spike',   3655.76, 0.)
  ('Spike',   7188.96, 0.) ('Spike',   9880.37, 0.)
  ('Spike+Ripple',  13890.6 , 0.) ...
  ('Spike+Fast-Ripple', 107924.  , 0.)
  ('Spike+Ripple+Fast-Ripple', 110238.  , 0.)
  ('Spi

### Get the markers with a certain SNR in a specific region
Let's use the markers with a SNR=15dB in the first region

In [68]:
relevant_markers = markers[ch_start_idx:ch_end_idx]
preview_np_array(relevant_markers, "relevant_markers")

relevant_markers Shape: (30, 42).
Preview: [[('Spike',   1000.  , 0.) ('Spike+Fast-Ripple',   4218.75, 0.)
  ('Ripple+Fast-Ripple',   6966.8 , 0.)
  ('Ripple+Fast-Ripple',   9793.95, 0.)
  ('Spike+Ripple+Fast-Ripple',  13189.  , 0.) ...
  ('Ripple+Fast-Ripple', 107859.  , 0.) ('Spike+Ripple', 111439.  , 0.)
  ('Ripple', 114551.  , 0.) ('Spike+Ripple', 116517.  , 0.)
  ('Spike+Ripple', 119000.  , 0.)]
 [('Spike+Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3917.97, 0.)
  ('Spike+Fast-Ripple',   6794.92, 0.) ('Fast-Ripple',   9094.73, 0.)
  ('Fast-Ripple',  12348.1 , 0.) ... ('Spike+Fast-Ripple', 107697.  , 0.)
  ('Fast-Ripple', 111079.  , 0.) ('Spike', 113382.  , 0.)
  ('Ripple+Fast-Ripple', 116656.  , 0.) ('Spike+Ripple', 119000.  , 0.)]
 [('Fast-Ripple',   1000.  , 0.) ('Spike',   3655.76, 0.)
  ('Spike',   7188.96, 0.) ('Spike',   9880.37, 0.)
  ('Spike+Ripple',  13890.6 , 0.) ...
  ('Spike+Fast-Ripple', 107924.  , 0.)
  ('Spike+Ripple+Fast-Ripple', 110238.  , 0.)
  ('Spik

### Build a subset of the SEEG recording joining data from the channels selected above that contain relevant HFOs

In [69]:
# Build the time_step values of the signal
time_vals = [val for val in np.arange(x_step, input_duration + x_step, x_step)]
print("Time values: ", time_vals[:10])

Time values:  [0.48828125, 0.9765625, 1.46484375, 1.953125, 2.44140625, 2.9296875, 3.41796875, 3.90625, 4.39453125, 4.8828125]


In [70]:
final_signal = np.empty(shape=(num_samples))    # Initialize the final signal
final_markers = []                  # List holding the final markers

# Variable holding the current marker idx for each channel
channels_curr_marker_idx = np.zeros(shape=(relevant_markers.shape[0]), dtype=int)
preview_np_array(channels_curr_marker_idx, "channels_curr_marker_idx")

channels_curr_marker_idx Shape: (30,).
Preview: [0 0 0 0 0 ... 0 0 0 0 0]


## Set the lenght of each segment to be extracted from a channel
Since we want to join the data from several channels in a single subset, we need to set the length of the segments to be extracted from each channel.

Furthermore, we also need to define a padding to avoid adding a HFO that occurs at the end of a segment since it may interfere with the next segment and be cut short.

In [71]:
segment_length = 500    # 500 ms segments. Every 500ms, there will be a relevant event (Ripple, Fast Ripple, or both)

segment_end_padding = 150    # 150 ms padding at the end of each segment (Longest Ripples seem to last about 120ms according to Sofia?)

In [72]:
from utils.input import label_has_hfo_event

curr_segment_ch_start_idx = 0    # Variable to hold the index where we start searching for a relevant marker in each segment

for curr_segment in range(0, input_duration, segment_length):
    time_start_idx = int(curr_segment / x_step)   # Start index of the segment in the time_vals array
    time_end_idx = int((curr_segment + segment_length) / x_step)   # End index of the segment in the time_vals array (exclusive)
    # print("time start idx: ", time_start_idx, "time end idx: ", time_end_idx)

    # Search for a relevant event in this segment
    starting_marker_idx = int(curr_segment / segment_length) % relevant_markers.shape[0]    # Get the marker index where we start searching for this segment
    # print("starting_marker_idx: ", starting_marker_idx)
    curr_marker_idx = starting_marker_idx

    segment_marker = None   # Variable to hold the marker of the current segment
    # Search for a relevant event in this time segment
    while True:
        # Check if the markers of the current channel have a relevant event inside the segment
        for inner_marker_idx in range(channels_curr_marker_idx[curr_marker_idx], relevant_markers.shape[1], 1):
            curr_marker = relevant_markers[curr_marker_idx][inner_marker_idx]

            # Check if the current marker is a relevant event (Ripple, Fast Ripple, or both)
            if label_has_hfo_event(curr_marker['label']):
                
                # print(curr_marker['position'] + segment_end_padding, curr_segment + segment_length)

                if curr_marker['position'] >= curr_segment and (curr_marker['position'] + segment_end_padding <= curr_segment + segment_length):
                    # print("curr_marker position: ", curr_marker['position'], "curr_segment: ", curr_segment, "segment_length: ", segment_length)
                    # Found a relevant event
                    segment_marker = curr_marker

                    # Update the current marker index for this channel to the next marker
                    channels_curr_marker_idx[curr_marker_idx] = inner_marker_idx + 1

                    break   # Stop searching for relevant events in this channel
            
            # Check if the current marker is outside the segment
            if curr_marker['position'] < curr_segment + segment_length:
                # Update the current marker index for this channel to the next marker of this channel
                channels_curr_marker_idx[curr_marker_idx] = inner_marker_idx + 1

        # If a relevant event is found, add the relevant event to the final signal
        if segment_marker is not None:
            # Add the relevant event's seeg data to the final signal
            final_signal[time_start_idx:time_end_idx] = relevant_seeg[time_start_idx:time_end_idx, curr_marker_idx]

            # Add the relevant event's marker to the final markers
            final_markers.append(segment_marker)  # TODO: CHange this to the actual marker after being found
            break


        # Search in the markers of the next channel
        curr_marker_idx = (curr_marker_idx + 1) % relevant_markers.shape[0]    # Move to the next marker index

        # Check if we have reached the starting marker index
        if curr_marker_idx == starting_marker_idx:
            # We have searched all the markers in this segment (No relevant event found) 
            # -> Add current channel data to the final signal (with no relevant events)

            # Add background noise to the segment
            final_signal[time_start_idx:time_end_idx] = relevant_seeg[time_start_idx:time_end_idx, curr_marker_idx]
            break

### Preview the final signal and markers

In [73]:
preview_np_array(final_signal, "Final Signal")

Final Signal Shape: (245760,).
Preview: [ 0.00095524  0.00310525 -0.00478473 -0.03071283 -0.02514711 ...
  0.79872815  0.21016244 -0.49596476 -0.2753618   0.28661303]


In [74]:
final_markers = np.array(final_markers)
preview_np_array(final_markers, "Final Markers")

Final Markers Shape: (226,).
Preview: [('Fast-Ripple',   1000.  , 0.)
 ('Spike+Ripple+Fast-Ripple',   3206.54, 0.)
 ('Spike+Ripple',   3521.  , 0.) ('Ripple+Fast-Ripple',   4134.77, 0.)
 ('Fast-Ripple',   4774.41, 0.) ...
 ('Spike+Ripple+Fast-Ripple', 115019.  , 0.)
 ('Spike+Fast-Ripple', 115777.  , 0.) ('Spike+Ripple', 116216.  , 0.)
 ('Ripple+Fast-Ripple', 116769.  , 0.) ('Ripple', 119000.  , 0.)]


---

## Visualize the Subset of the SEEG Data

In [75]:
# Interactive Plot for the HFO detection
# bokeh docs: https://docs.bokeh.org/en/2.4.1/docs/first_steps/first_steps_1.html

from utils.line_plot import create_fig  # Import the function to create the figure
from bokeh.models import Range1d

# Define the x and y values
# Should the first input start at 0 or x_step?
# TODO: is it okay to create a range with floats?
x = [val for val in np.arange(x_step, input_duration + x_step, x_step)] 

# Create the y arrays for the voltage plot representing the voltage of each electrode
v_yarrays = [final_signal]

## Create the Plot

In [76]:
# Create the plot
# List of tuples containing the y values and the legend label
hfo_y_arrays = [(final_signal, "Joined subset of SEEG Channels")]

# Create the SEEG Voltage plot
hfo_plot = create_fig(
    title="SEEG Voltage dynamics of Filtered Ripple and Fast Ripple Bands", 
    x_axis_label='time (ms)', 
    y_axis_label='Voltage (μV)',
    x=x, 
    y_arrays=hfo_y_arrays, 
    sizing_mode="stretch_both", 
    tools="pan, box_zoom, wheel_zoom, hover, undo, redo, zoom_in, zoom_out, reset, save",
    tooltips="Data point @x: @y",
    legend_location="top_right",
    legend_bg_fill_color="navy",
    legend_bg_fill_alpha=0.1,
    # y_range=Range1d(-0.05, 1.05)
)

# If there are more than 30 channels, hide the legend
if len(hfo_y_arrays) > 30:
    # Hide the legend
    hfo_plot.legend.visible = False

## Add Box Annotations to the plot to identify the marked HFOs (ground truth)

In [77]:
from bokeh.models import BoxAnnotation
# from utils.line_plot import color_map

show_markers = True    # Boolean to show the markers

color_map = {                  
    'Spike': 'red',
    'Fast-Ripple': 'blue',
    'Ripple': 'green',  
    'Spike+Ripple': 'yellow',
    'Spike+Fast-Ripple': 'pink',
    'Ripple+Fast-Ripple': 'cyan',
    'Spike+Ripple+Fast-Ripple': 'black'
}

confidence_range = 100          # TODO: Check this value. When the duration is missing (0), we consider the 200ms window around the marked position 
visited_markers = {}    # Avoid inserting multiple boxes for the same marker (only one of each label)
use_visited = False     # Boolean controlling if we remove duplicate markers
plot_instant = True     # Boolean to plot the markers as instant events or as boxes
instant_width = 100 # 20       # Width of the instant event for visualization purposes

if show_markers:
    for idx2, marker in enumerate(final_markers):
        # print("marker:", marker)
        
        if use_visited:
            # Check if the marker has already been visited and skip it if it has
            if marker['position'] in visited_markers:
                visited_labels = visited_markers[marker['position']]    # Get the labels that already have an annotation for this position
                if marker['label'] in visited_labels:
                    # print("Skipping marker", marker['position'], marker['label'])
                    continue    # Skip this marker
                else:
                    visited_labels.append(marker['label'])  # Add the label to the visited labels
            else:
                visited_markers[marker['position']] = [marker['label']] # Add the marker to the visited markers

        # Add a box annotation for each marker
        has_duration = marker['duration'] > 0
        
        confidence_constant = 0 if plot_instant or has_duration else confidence_range

        left = marker['position'] - confidence_constant
        right = marker['position'] + confidence_constant + instant_width
        box_color = color_map[marker['label']]  # Choose a color according to the label
        
        # if left < min_t or right > max_t:
        #     continue    # Skip this marker
        

        box = BoxAnnotation(left=left, right=right, fill_color=box_color, fill_alpha=0.35)
        # print("Added marker for channel: ", ch_idx, " at position: ", left)
        hfo_plot.add_layout(box)

## Show the Plot

In [78]:
import bokeh.plotting as bplt

showPlot = True
if showPlot:
    bplt.show(hfo_plot)

## Export the plot to a file

In [80]:
export = True

if export:
    file_path = f"{PATH_TO_FILE}results/seeg_filtered_subset_90-119_ripple_band.html"

    # Customize the output file settings
    bplt.output_file(filename=file_path, title="SEEG Data - Filtered Voltage dynamics across time")

    # Save the plot
    bplt.save(hfo_plot)

    # Close the plot
    bplt.curdoc().clear()
    bplt.reset_output()

## Write the final markers into a .npy file
Add an extra dimension (n_channels) to the markers array to match the shape of the other dataset

In [81]:
# To keep the same format as the original data, we will add an extra dimension to the final markers
final_markers = np.array([final_markers])
preview_np_array(final_markers, "Final Markers")

Final Markers Shape: (1, 226).
Preview: [[('Fast-Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3206.54, 0.)
  ('Spike+Ripple',   3521.  , 0.) ('Ripple+Fast-Ripple',   4134.77, 0.)
  ('Fast-Ripple',   4774.41, 0.) ...
  ('Spike+Ripple+Fast-Ripple', 115019.  , 0.)
  ('Spike+Fast-Ripple', 115777.  , 0.) ('Spike+Ripple', 116216.  , 0.)
  ('Ripple+Fast-Ripple', 116769.  , 0.) ('Ripple', 119000.  , 0.)]]


In [43]:
file_name = f"processed/seeg_filtered_subset_90-119_markers.npy"

np.save(file_name, final_markers)   # Save the data to a numpy file (not stored in git due to size)

## Write the final subset of the SEEG data into a .npy file
Add an extra dimension (n_channels) to the signal array to match the shape of the other dataset

In [82]:
# To keep the same format as the original data, we will add an extra inner dimension to the final signal
final_signal = final_signal.reshape((num_samples, 1))
preview_np_array(final_signal, "Final Signal")

Final Signal Shape: (245760, 1).
Preview: [[ 0.00095524]
 [ 0.00310525]
 [-0.00478473]
 [-0.03071283]
 [-0.02514711]
 ...
 [ 0.79872815]
 [ 0.21016244]
 [-0.49596476]
 [-0.2753618 ]
 [ 0.28661303]]


In [83]:
file_name = f"processed/seeg_filtered_subset_90-119_fr_band.npy"

np.save(file_name, final_signal)   # Save the data to a numpy file (not stored in git due to size)