# Build subset of SEEG recording containing relevant HFOs and some background activity


### Check WD (change if necessary) and file loading

In [56]:
# Show current directory
import os
curr_dir = os.getcwd()
print(curr_dir)

# Check if the current WD is the file location
if "/src/seeg_data/synthetic" not in os.getcwd():
    # Set working directory to this file location
    file_location = f"{os.getcwd()}/thesis-lava/src/seeg_data/synthetic"
    print("File Location: ", file_location)

    # Change the current working Directory
    os.chdir(file_location)

    # New Working Directory
    print("New Working Directory: ", os.getcwd())

PATH_TO_FILE = '' # 'src/hfo/'  # This is needed if the WD is not the same as the file location

/home/monkin/Desktop/feup/thesis/thesis-lava/src/seeg_data/synthetic


In [57]:
import numpy as np
import math
from utils.io import preview_np_array

seeg_file_name = "seeg_synthetic_humans.npy"
recorded_data = np.load(f"{PATH_TO_FILE}{seeg_file_name}")

print("Data shape: ", recorded_data.shape)
print("First time steps: ", recorded_data[:10])

Data shape:  (245760, 960)
First time steps:  [[ 3.23520243e-01 -1.32353902e+00 -5.96688092e-01 -8.91667426e-01
   1.75283265e+00 ...  1.24049798e-01  1.43669665e+00 -1.96089995e+00
  -1.97698221e-01 -1.20784545e+00]
 [-6.97590993e-04 -3.51223612e+00 -4.87669557e-01 -1.26132798e+00
   2.45160866e+00 ... -1.40494496e-01  2.09809756e+00 -5.87578297e+00
  -7.44009852e-01 -5.10960639e-01]
 [ 1.90266395e+00 -5.67260170e+00  9.82748926e-01 -1.32037580e+00
   2.60521674e+00 ... -8.93780053e-01  2.16126227e+00 -6.61829710e+00
  -8.30532670e-01 -8.15966547e-01]
 [ 3.42821074e+00 -5.91751766e+00  2.36291528e+00 -2.17143798e+00
   3.95203590e+00 ... -6.92936718e-01  1.80983675e+00 -6.31197834e+00
  -1.50293064e+00 -2.36355257e+00]
 [ 4.44311810e+00 -5.09231329e+00  3.05757546e+00 -2.64964199e+00
   5.75733852e+00 ...  2.84276128e-01  1.76603031e+00 -6.97854471e+00
  -1.33813846e+00 -1.11216927e+00]
 [ 3.50692177e+00 -5.66193056e+00  1.01127577e+00 -2.33646107e+00
   7.53711605e+00 ...  1.17622674

## Define Global Parameters of the Experiment

In [58]:
sampling_rate = 2048    # 2048 Hz
input_duration = 120 * (10**3)    # 120000 ms or 120 seconds
num_samples = recorded_data.shape[0]    # 2048 * 120 = 245760
num_channels = recorded_data.shape[1]   # 960

x_step = 1/sampling_rate * (10**3)  # 0.48828125 ms

## Define the Channels of Interest that will be used to extract the signal

In [59]:
ch_start_idx = 90  # Start index of the region of interest
ch_end_idx = ch_start_idx + 30 # End index of the region of interest 

### Extract the SEEG channels in the defined range from the SEEG data

In [60]:
# Extract the seeg_channels in the range [ch_start_idx, ch_end_idx]
relevant_seeg = recorded_data[:, ch_start_idx:ch_end_idx]

preview_np_array(relevant_seeg, "SEEG Channels")

SEEG Channels Shape: (245760, 30).
Preview: [[ 3.2148191e-01  7.6039447e-03  4.7144434e-01  1.6629694e-01
   6.4306718e-01 ... -1.0047178e+00 -2.3591769e+00 -9.1859162e-01
  -1.1903234e+00 -1.0296663e+00]
 [-2.0508800e-02 -4.6258485e-01 -7.4022943e-01 -1.8455078e-01
   2.1398619e-01 ... -1.3898176e+00 -3.7664881e+00 -1.3889902e+00
  -5.1110125e-01 -1.5383426e+00]
 [-1.2178916e+00 -2.0358562e+00 -1.2876116e+00 -1.3673829e+00
   1.2640097e+00 ... -2.6840944e+00 -4.0100474e+00 -1.9828362e+00
  -1.4741321e+00 -1.6997232e+00]
 [-1.0280560e+00 -3.0880692e+00 -1.2848712e+00  2.0071094e-01
   2.3219368e+00 ... -1.9042799e+00 -2.8593712e+00 -1.8951950e+00
  -2.2079365e+00 -2.1716366e+00]
 [-1.6502820e+00 -2.6590590e+00 -2.6356335e+00 -1.8437275e-01
   4.8591003e+00 ... -7.3706615e-01 -3.4916322e+00 -3.4061065e+00
  -1.9112504e+00 -2.1673732e+00]
 ...
 [ 5.8455471e+01  5.3846050e+01 -2.0737498e+00 -3.2716221e-01
   7.4935384e+00 ...  5.5184326e+01  6.2657150e+01 -5.0081539e+01
  -1.8071165e+01 -

## Import the Markers (Annotated Events) 
The markers are stored in a numpy array of shape (num_channels, events):
- Each row represents the events of a channel
- Each event is composed of the following 3 fields (Label, Position, Shape)

In [62]:
markers_seeg_file_name = "seeg_synthetic_humans_markers.npy"
markers = np.load(f"{PATH_TO_FILE}{markers_seeg_file_name}")

print("Markers shape: ", markers.shape)
print("First time steps: ", markers[:10])

Markers shape:  (960, 42)
First time steps:  [[('Spike+Ripple+Fast-Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   4537.6 , 0.)
  ('Ripple+Fast-Ripple',   7610.84, 0.) ('Spike',  10261.2 , 0.)
  ('Fast-Ripple',  13411.6 , 0.) ... ('Spike+Fast-Ripple', 106672.  , 0.)
  ('Ripple+Fast-Ripple', 109322.  , 0.) ('Ripple', 113024.  , 0.)
  ('Fast-Ripple', 116549.  , 0.) ('Spike+Ripple', 119000.  , 0.)]
 [('Spike+Fast-Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3849.12, 0.)
  ('Ripple+Fast-Ripple',   7010.25, 0.) ('Spike',   9893.55, 0.)
  ('Spike+Ripple',  12840.8 , 0.) ... ('Spike', 109180.  , 0.)
  ('Spike', 112373.  , 0.) ('Fast-Ripple', 114176.  , 0.)
  ('Spike+Fast-Ripple', 116672.  , 0.) ('Fast-Ripple', 119000.  , 0.)]
 [('Fast-Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   4357.42, 0.)
  ('Fast-Ripple',   7062.01, 0.) ('Spike+Ripple',  10354.  , 0.)
  ('Spike+Ripple',  12860.4 , 0.) ... ('Spike+Ripple', 108121.  , 0.)
  ('Ripple+Fast-Ripple', 110854.  , 

### Get the markers with a certain SNR in a specific region
Let's use the markers with a SNR=15dB in the first region

In [61]:
relevant_markers = markers[ch_start_idx:ch_end_idx]
preview_np_array(relevant_markers, "relevant_markers")

relevant_markers Shape: (30, 42).
Preview: [[('Spike',   1000.  , 0.) ('Spike+Fast-Ripple',   4218.75, 0.)
  ('Ripple+Fast-Ripple',   6966.8 , 0.)
  ('Ripple+Fast-Ripple',   9793.95, 0.)
  ('Spike+Ripple+Fast-Ripple',  13189.  , 0.) ...
  ('Ripple+Fast-Ripple', 107859.  , 0.) ('Spike+Ripple', 111439.  , 0.)
  ('Ripple', 114551.  , 0.) ('Spike+Ripple', 116517.  , 0.)
  ('Spike+Ripple', 119000.  , 0.)]
 [('Spike+Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3917.97, 0.)
  ('Spike+Fast-Ripple',   6794.92, 0.) ('Fast-Ripple',   9094.73, 0.)
  ('Fast-Ripple',  12348.1 , 0.) ... ('Spike+Fast-Ripple', 107697.  , 0.)
  ('Fast-Ripple', 111079.  , 0.) ('Spike', 113382.  , 0.)
  ('Ripple+Fast-Ripple', 116656.  , 0.) ('Spike+Ripple', 119000.  , 0.)]
 [('Fast-Ripple',   1000.  , 0.) ('Spike',   3655.76, 0.)
  ('Spike',   7188.96, 0.) ('Spike',   9880.37, 0.)
  ('Spike+Ripple',  13890.6 , 0.) ...
  ('Spike+Fast-Ripple', 107924.  , 0.)
  ('Spike+Ripple+Fast-Ripple', 110238.  , 0.)
  ('Spik

### Build a subset of the SEEG recording joining data from the channels selected above that contain relevant HFOs

In [63]:
# Build the time_step values of the signal
time_vals = [val for val in np.arange(x_step, input_duration + x_step, x_step)]
print("Time values: ", time_vals[:10])

Time values:  [0.48828125, 0.9765625, 1.46484375, 1.953125, 2.44140625, 2.9296875, 3.41796875, 3.90625, 4.39453125, 4.8828125]


In [64]:
final_signal = np.empty(shape=(num_samples))    # Initialize the final signal
final_markers = []                  # List holding the final markers

# Variable holding the current marker idx for each channel
curr_marker_idx = np.zeros(shape=(relevant_markers.shape[0]), dtype=int)
preview_np_array(curr_marker_idx, "curr_marker_idx")

curr_marker_idx Shape: (30,).
Preview: [0 0 0 0 0 ... 0 0 0 0 0]


## Set the lenght of each segment to be extracted from a channel
Since we want to join the data from several channels in a single subset, we need to set the length of the segments to be extracted from each channel.

In [65]:
segment_length = 500    # 500 ms segments. Every 500ms, there will be a relevant event (Ripple, Fast Ripple, or both)

In [66]:
curr_segment_ch_start_idx = 0    # Variable to hold the index where we start searching for a relevant marker in each segment

for curr_segment in range(0, input_duration, segment_length):
    time_start_idx = int(curr_segment / x_step)   # Start index of the segment in the time_vals array
    time_end_idx = int((curr_segment + segment_length) / x_step)   # End index of the segment in the time_vals array (exclusive)
    # print("time start idx: ", time_start_idx, "time end idx: ", time_end_idx)

    # Search for a relevant event in this segment
    starting_marker_idx = int(curr_segment / segment_length) % relevant_markers.shape[0]    # Get the marker index where we start searching for this segment
    # print("starting_marker_idx: ", starting_marker_idx)
    curr_marker_idx = starting_marker_idx

    # Search for a relevant event in this time segment
    while True:
        # Check if the markers of the current channel have a relevant event
        # TODO:

        # If a relevant event is found, add the relevant event to the final signal
        if True:
            # Add the relevant event's seeg data to the final signal
            final_signal[time_start_idx:time_end_idx] = relevant_seeg[time_start_idx:time_end_idx, curr_marker_idx]

            # Add the relevant event's marker to the final markers
            final_markers.append(relevant_markers[curr_marker_idx][0])  # TODO: CHange this to the actual marker after being found
            break


        # Search in the markers of the next channel
        curr_marker_idx = (curr_marker_idx + 1) % relevant_markers.shape[0]    # Move to the next marker index

        # Check if we have reached the starting marker index
        if curr_marker_idx == starting_marker_idx:
            # We have searched all the markers in this segment (No relevant event found) 
            # -> Add current channel data to the final signal (with no relevant events)

            # Add background noise to the segment
            final_signal[time_start_idx:time_end_idx] = relevant_seeg[time_start_idx:time_end_idx, curr_marker_idx]
            break

### Preview the final signal and markers

In [67]:
preview_np_array(final_signal, "Final Signal")

Final Signal Shape: (245760,).
Preview: [  0.32148191  -0.0205088   -1.21789157  -1.02805603  -1.65028203 ...
 -10.6392498  -11.40901566 -11.79014492 -11.26972866 -10.32582855]


In [68]:
final_markers = np.array(final_markers)
preview_np_array(final_markers, "Final Markers")

Final Markers Shape: (240,).
Preview: [('Spike', 1000., 0.) ('Spike+Ripple', 1000., 0.)
 ('Fast-Ripple', 1000., 0.) ('Spike+Ripple+Fast-Ripple', 1000., 0.)
 ('Ripple', 1000., 0.) ... ('Spike', 1000., 0.) ('Spike', 1000., 0.)
 ('Ripple+Fast-Ripple', 1000., 0.) ('Spike+Fast-Ripple', 1000., 0.)
 ('Spike+Ripple', 1000., 0.)]


---

## Visualize the Subset of the SEEG Data

In [87]:
# Interactive Plot for the HFO detection
# bokeh docs: https://docs.bokeh.org/en/2.4.1/docs/first_steps/first_steps_1.html

from utils.line_plot import create_fig  # Import the function to create the figure
from bokeh.models import Range1d

# Define the x and y values
# Should the first input start at 0 or x_step?
# TODO: is it okay to create a range with floats?
x = [val for val in np.arange(x_step, input_duration + x_step, x_step)] 

# Create the y arrays for the voltage plot representing the voltage of each electrode
v_yarrays = [ripple_band_seeg, fr_band_seeg]

## Create the Plot

In [88]:
# Create the plot
# List of tuples containing the y values and the legend label
hfo_y_arrays = [(ripple_band_seeg, "Ripple Band"), (fr_band_seeg, "Fast Ripple Band")]

# Create the SEEG Voltage plot
hfo_plot = create_fig(
    title="SEEG Voltage dynamics of Filtered Ripple and Fast Ripple Bands", 
    x_axis_label='time (ms)', 
    y_axis_label='Voltage (μV)',
    x=x, 
    y_arrays=hfo_y_arrays, 
    sizing_mode="stretch_both", 
    tools="pan, box_zoom, wheel_zoom, hover, undo, redo, zoom_in, zoom_out, reset, save",
    tooltips="Data point @x: @y",
    legend_location="top_right",
    legend_bg_fill_color="navy",
    legend_bg_fill_alpha=0.1,
    # y_range=Range1d(-0.05, 1.05)
)

# If there are more than 30 channels, hide the legend
if len(hfo_y_arrays) > 30:
    # Hide the legend
    hfo_plot.legend.visible = False

## Add Box Annotations to the plot to identify the marked HFOs (ground truth)

In [89]:
from bokeh.models import BoxAnnotation
# from utils.line_plot import color_map

show_markers = True    # Boolean to show the markers

color_map = {                  
    'Spike': 'red',
    'Fast-Ripple': 'blue',
    'Ripple': 'green',  
    'Spike+Ripple': 'yellow',
    'Spike+Fast-Ripple': 'pink',
    'Ripple+Fast-Ripple': 'cyan',
    'Spike+Ripple+Fast-Ripple': 'black'
}

confidence_range = 100          # TODO: Check this value. When the duration is missing (0), we consider the 200ms window around the marked position 
visited_markers = {}    # Avoid inserting multiple boxes for the same marker (only one of each label)
use_visited = False     # Boolean controlling if we remove duplicate markers
plot_instant = True     # Boolean to plot the markers as instant events or as boxes
instant_width = 100 # 20       # Width of the instant event for visualization purposes

channels_used = {selected_ch_idx}   # Set of channels to be used
if show_markers:
    for ch_idx in channels_used:
        channel_markers = markers[ch_idx]
        # print("channel_markers", channel_markers)
        for idx2, marker in enumerate(channel_markers):
            # print("marker:", marker)
            
            if use_visited:
                # Check if the marker has already been visited and skip it if it has
                if marker['position'] in visited_markers:
                    visited_labels = visited_markers[marker['position']]    # Get the labels that already have an annotation for this position
                    if marker['label'] in visited_labels:
                        # print("Skipping marker", marker['position'], marker['label'])
                        continue    # Skip this marker
                    else:
                        visited_labels.append(marker['label'])  # Add the label to the visited labels
                else:
                    visited_markers[marker['position']] = [marker['label']] # Add the marker to the visited markers

            # Add a box annotation for each marker
            has_duration = marker['duration'] > 0
            
            confidence_constant = 0 if plot_instant or has_duration else confidence_range

            left = marker['position'] - confidence_constant
            right = marker['position'] + confidence_constant + instant_width
            box_color = color_map[marker['label']]  # Choose a color according to the label
            
            # if left < min_t or right > max_t:
            #     continue    # Skip this marker
            

            box = BoxAnnotation(left=left, right=right, fill_color=box_color, fill_alpha=0.35)
            # print("Added marker for channel: ", ch_idx, " at position: ", left)
            hfo_plot.add_layout(box)

## Show the Plot

In [90]:
import bokeh.plotting as bplt

showPlot = False
if showPlot:
    bplt.show(hfo_plot)

## Export the plot to a file

In [91]:
export = True

if export:
    file_path = f"{PATH_TO_FILE}results/filtered_seeg_ch{selected_ch_idx}.html"

    # Customize the output file settings
    bplt.output_file(filename=file_path, title="SEEG Data - Filtered Voltage dynamics across time")

    # Save the plot
    bplt.save(hfo_plot)