# Build subset of SEEG recording containing relevant HFOs and some background activity


### Check WD (change if necessary) and file loading

In [10]:
# Show current directory
import os
curr_dir = os.getcwd()
print(curr_dir)

# Check if the current WD is the file location
if "/src/seeg_data/synthetic" not in os.getcwd():
    # Set working directory to this file location
    file_location = f"{os.getcwd()}/thesis-lava/src/seeg_data/synthetic"
    print("File Location: ", file_location)

    # Change the current working Directory
    os.chdir(file_location)

    # New Working Directory
    print("New Working Directory: ", os.getcwd())

PATH_TO_FILE = '' # 'src/hfo/'  # This is needed if the WD is not the same as the file location

/home/monkin/Desktop/feup/thesis/thesis-lava/src/seeg_data/synthetic


In [11]:
import numpy as np
import math
from utils.io import preview_np_array

seeg_file_name = "seeg_synthetic_humans.npy"
recorded_data = np.load(f"{PATH_TO_FILE}{seeg_file_name}")

print("Data shape: ", recorded_data.shape)
print("First time steps: ", recorded_data[:10])

Data shape:  (245760, 960)
First time steps:  [[ 3.2352024e-01 -1.3235390e+00 -5.9668809e-01 ... -1.9608999e+00
  -1.9769822e-01 -1.2078454e+00]
 [-6.9759099e-04 -3.5122361e+00 -4.8766956e-01 ... -5.8757830e+00
  -7.4400985e-01 -5.1096064e-01]
 [ 1.9026639e+00 -5.6726017e+00  9.8274893e-01 ... -6.6182971e+00
  -8.3053267e-01 -8.1596655e-01]
 ...
 [ 3.2172418e+00 -8.4650068e+00  1.5216088e+00 ... -4.1081657e+00
   2.0085973e-01 -4.7539668e+00]
 [ 1.7725919e+00 -9.4744024e+00  1.6776791e+00 ... -4.1469693e+00
   1.6412770e+00 -3.4672713e+00]
 [ 7.8109097e-01 -1.0500931e+01  2.3717029e+00 ... -5.1762242e+00
   1.0715837e+00 -4.4489903e+00]]


### Extract a single SEEG channel from the SEEG data

In [12]:
selected_ch_idx = 94    # Index of the channel used to create the subset
seeg_ch = list(map(lambda all_channels: all_channels[selected_ch_idx], recorded_data))  # Selecting the first channel
seeg_ch_np = np.array(seeg_ch)

print(f"seeg_ch_np shape: {seeg_ch_np.shape}. \nPreview: {seeg_ch_np}")

seeg_ch_np shape: (245760,). 
Preview: [0.6430672  0.21398619 1.2640097  ... 8.063279   8.95947    7.402966  ]


## Define Global Parameters of the Experiment

In [14]:
sampling_rate = 2048    # 2048 Hz
input_duration = 120 * (10**3)    # 120000 ms or 120 seconds
num_samples = recorded_data.shape[0]    # 2048 * 120 = 245760
num_channels = recorded_data.shape[1]   # 960

x_step = 1/sampling_rate * (10**3)  # 0.48828125 ms

## Import the Markers (Annotated Events) 
The markers are stored in a numpy array of shape (num_channels, events):
- Each row represents the events of a channel
- Each event is composed of the following 3 fields (Label, Position, Shape)

In [13]:
markers_seeg_file_name = "seeg_synthetic_humans_markers.npy"
markers = np.load(f"{PATH_TO_FILE}{markers_seeg_file_name}")

print("Markers shape: ", markers.shape)
print("First time steps: ", markers[:10])

Markers shape:  (960, 42)
First time steps:  [[('Spike+Ripple+Fast-Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   4537.6 , 0.)
  ('Ripple+Fast-Ripple',   7610.84, 0.) ('Spike',  10261.2 , 0.)
  ('Fast-Ripple',  13411.6 , 0.) ('Spike+Ripple',  15644.  , 0.)
  ('Ripple',  18607.9 , 0.) ('Fast-Ripple',  21300.8 , 0.)
  ('Fast-Ripple',  24086.9 , 0.)
  ('Spike+Ripple+Fast-Ripple',  26598.1 , 0.)
  ('Spike+Fast-Ripple',  30529.8 , 0.) ('Ripple',  33387.7 , 0.)
  ('Spike+Fast-Ripple',  36145.5 , 0.) ('Spike',  38744.1 , 0.)
  ('Spike+Fast-Ripple',  41876.  , 0.)
  ('Ripple+Fast-Ripple',  44531.7 , 0.) ('Spike+Ripple',  47699.7 , 0.)
  ('Spike+Ripple+Fast-Ripple',  50303.2 , 0.) ('Ripple',  53045.9 , 0.)
  ('Ripple+Fast-Ripple',  56416.  , 0.)
  ('Spike+Ripple+Fast-Ripple',  58838.4 , 0.)
  ('Ripple+Fast-Ripple',  62578.6 , 0.) ('Fast-Ripple',  65430.2 , 0.)
  ('Spike',  68184.1 , 0.) ('Spike+Ripple',  70753.9 , 0.)
  ('Ripple+Fast-Ripple',  73788.6 , 0.) ('Spike',  76749.  , 0.)
  

### Get the markers with a certain SNR in a specific region
Let's use the markers with a SNR=15dB in the first region

In [17]:
ch_start_idx = 90  # Start index of the region of interest
ch_end_idx = ch_start_idx + 30 # End index of the region of interest 

relevant_markers = markers[ch_start_idx:ch_end_idx]
preview_np_array(relevant_markers, "relevant_markers")

relevant_markers Shape: (30, 42).
Preview: [[('Spike',   1000.  , 0.) ('Spike+Fast-Ripple',   4218.75, 0.)
  ('Ripple+Fast-Ripple',   6966.8 , 0.)
  ('Ripple+Fast-Ripple',   9793.95, 0.)
  ('Spike+Ripple+Fast-Ripple',  13189.  , 0.) ...
  ('Ripple+Fast-Ripple', 107859.  , 0.) ('Spike+Ripple', 111439.  , 0.)
  ('Ripple', 114551.  , 0.) ('Spike+Ripple', 116517.  , 0.)
  ('Spike+Ripple', 119000.  , 0.)]
 [('Spike+Ripple',   1000.  , 0.)
  ('Spike+Ripple+Fast-Ripple',   3917.97, 0.)
  ('Spike+Fast-Ripple',   6794.92, 0.) ('Fast-Ripple',   9094.73, 0.)
  ('Fast-Ripple',  12348.1 , 0.) ... ('Spike+Fast-Ripple', 107697.  , 0.)
  ('Fast-Ripple', 111079.  , 0.) ('Spike', 113382.  , 0.)
  ('Ripple+Fast-Ripple', 116656.  , 0.) ('Spike+Ripple', 119000.  , 0.)]
 [('Fast-Ripple',   1000.  , 0.) ('Spike',   3655.76, 0.)
  ('Spike',   7188.96, 0.) ('Spike',   9880.37, 0.)
  ('Spike+Ripple',  13890.6 , 0.) ...
  ('Spike+Fast-Ripple', 107924.  , 0.)
  ('Spike+Ripple+Fast-Ripple', 110238.  , 0.)
  ('Spik

### Build a subset of the SEEG recording joining data from the channels selected above that contain relevant HFOs

In [20]:
# Build the time_step values of the signal
time_vals = [val for val in np.arange(x_step, input_duration + x_step, x_step)]
print("Time values: ", time_vals[:10])

Time values:  [0.48828125, 0.9765625, 1.46484375, 1.953125, 2.44140625, 2.9296875, 3.41796875, 3.90625, 4.39453125, 4.8828125]


In [22]:
final_signal = np.empty(shape=(num_samples))    # Initialize the final signal

# Variable holding the current marker idx for each channel
curr_marker_idx = np.zeros(shape=(relevant_markers.shape[0]), dtype=int)
preview_np_array(curr_marker_idx, "curr_marker_idx")

curr_marker_idx Shape: (30,).
Preview: [0 0 0 0 0 ... 0 0 0 0 0]


## Set the lenght of each segment to be extracted from a channel
Since we want to join the data from several channels in a single subset, we need to set the length of the segments to be extracted from each channel.

In [None]:
segment_length = 500    # 500 ms segments. Every 500ms, there will be a relevant event (Ripple, Fast Ripple, or both)

In [None]:
for curr_idx in range(num_samples):
    curr_time_step = time_vals[curr_idx]

---

## Visualize the Subset of the SEEG Data

In [87]:
# Interactive Plot for the HFO detection
# bokeh docs: https://docs.bokeh.org/en/2.4.1/docs/first_steps/first_steps_1.html

from utils.line_plot import create_fig  # Import the function to create the figure
from bokeh.models import Range1d

# Define the x and y values
# Should the first input start at 0 or x_step?
# TODO: is it okay to create a range with floats?
x = [val for val in np.arange(x_step, input_duration + x_step, x_step)] 

# Create the y arrays for the voltage plot representing the voltage of each electrode
v_yarrays = [ripple_band_seeg, fr_band_seeg]

## Create the Plot

In [88]:
# Create the plot
# List of tuples containing the y values and the legend label
hfo_y_arrays = [(ripple_band_seeg, "Ripple Band"), (fr_band_seeg, "Fast Ripple Band")]

# Create the SEEG Voltage plot
hfo_plot = create_fig(
    title="SEEG Voltage dynamics of Filtered Ripple and Fast Ripple Bands", 
    x_axis_label='time (ms)', 
    y_axis_label='Voltage (μV)',
    x=x, 
    y_arrays=hfo_y_arrays, 
    sizing_mode="stretch_both", 
    tools="pan, box_zoom, wheel_zoom, hover, undo, redo, zoom_in, zoom_out, reset, save",
    tooltips="Data point @x: @y",
    legend_location="top_right",
    legend_bg_fill_color="navy",
    legend_bg_fill_alpha=0.1,
    # y_range=Range1d(-0.05, 1.05)
)

# If there are more than 30 channels, hide the legend
if len(hfo_y_arrays) > 30:
    # Hide the legend
    hfo_plot.legend.visible = False

## Add Box Annotations to the plot to identify the marked HFOs (ground truth)

In [89]:
from bokeh.models import BoxAnnotation
# from utils.line_plot import color_map

show_markers = True    # Boolean to show the markers

color_map = {                  
    'Spike': 'red',
    'Fast-Ripple': 'blue',
    'Ripple': 'green',  
    'Spike+Ripple': 'yellow',
    'Spike+Fast-Ripple': 'pink',
    'Ripple+Fast-Ripple': 'cyan',
    'Spike+Ripple+Fast-Ripple': 'black'
}

confidence_range = 100          # TODO: Check this value. When the duration is missing (0), we consider the 200ms window around the marked position 
visited_markers = {}    # Avoid inserting multiple boxes for the same marker (only one of each label)
use_visited = False     # Boolean controlling if we remove duplicate markers
plot_instant = True     # Boolean to plot the markers as instant events or as boxes
instant_width = 100 # 20       # Width of the instant event for visualization purposes

channels_used = {selected_ch_idx}   # Set of channels to be used
if show_markers:
    for ch_idx in channels_used:
        channel_markers = markers[ch_idx]
        # print("channel_markers", channel_markers)
        for idx2, marker in enumerate(channel_markers):
            # print("marker:", marker)
            
            if use_visited:
                # Check if the marker has already been visited and skip it if it has
                if marker['position'] in visited_markers:
                    visited_labels = visited_markers[marker['position']]    # Get the labels that already have an annotation for this position
                    if marker['label'] in visited_labels:
                        # print("Skipping marker", marker['position'], marker['label'])
                        continue    # Skip this marker
                    else:
                        visited_labels.append(marker['label'])  # Add the label to the visited labels
                else:
                    visited_markers[marker['position']] = [marker['label']] # Add the marker to the visited markers

            # Add a box annotation for each marker
            has_duration = marker['duration'] > 0
            
            confidence_constant = 0 if plot_instant or has_duration else confidence_range

            left = marker['position'] - confidence_constant
            right = marker['position'] + confidence_constant + instant_width
            box_color = color_map[marker['label']]  # Choose a color according to the label
            
            # if left < min_t or right > max_t:
            #     continue    # Skip this marker
            

            box = BoxAnnotation(left=left, right=right, fill_color=box_color, fill_alpha=0.35)
            # print("Added marker for channel: ", ch_idx, " at position: ", left)
            hfo_plot.add_layout(box)

## Show the Plot

In [90]:
import bokeh.plotting as bplt

showPlot = False
if showPlot:
    bplt.show(hfo_plot)

## Export the plot to a file

In [91]:
export = True

if export:
    file_path = f"{PATH_TO_FILE}results/filtered_seeg_ch{selected_ch_idx}.html"

    # Customize the output file settings
    bplt.output_file(filename=file_path, title="SEEG Data - Filtered Voltage dynamics across time")

    # Save the plot
    bplt.save(hfo_plot)