# Notebook for data visualization of the processed HFO data from human SEEGs

### Check WD (change if necessary) and file loading

In [99]:
# Show current directory
import os
curr_dir = os.getcwd()
print(curr_dir)

# Check if the current WD is the file location
if "/src/seeg_data/clinical" not in os.getcwd():
    # Set working directory to this file location
    file_location = f"{os.getcwd()}/thesis-lava/src/seeg_data/clinical"
    print("File Location: ", file_location)

    # Change the current working Directory
    os.chdir(file_location)

    # New Working Directory
    print("New Working Directory: ", os.getcwd())

PATH_TO_FILE = '' # 'src/hfo/'  # This is needed if the WD is not the same as the file location

/home/monkin/Desktop/feup/thesis/thesis-lava/src/seeg_data/clinical


## Add the parent directory to the path to detect the utils module

In [100]:
import os
import sys

# Add the parent directory to the path so it detects the utils module
module_path = os.path.abspath(os.path.join('src'))      # Changed this since WD is not the same as the file location
if module_path not in sys.path:
    sys.path.append(module_path)

## Load the data from the .npy file

In [101]:
import numpy as np
import math

seeg_file_name = "patients/csl/seeg_csl.npy"
recorded_data = np.load(f"{PATH_TO_FILE}{seeg_file_name}")

print("Data shape: ", recorded_data.shape)
print("First time steps: ", recorded_data[:10])

Data shape:  (129239, 86)
First time steps:  [[   1.0633698    34.293705     -5.3168535    31.369436    -37.483818
    -1.8608985     9.0386505  -102.88112       9.304497      6.911907
    38.813034     68.32156     -38.547188    -75.76517       8.241123
    57.422016   -140.36493      63.270557     15.684719     -5.582697
   -10.633707    -21.799099   -145.94763    -128.40201     -79.7528
   -44.927414     12.228767    106.33707     -26.052582   -141.16248
   -55.56112     -59.814606    -10.102022    -18.077301     12.228763
    -1.3292141   -70.979996     48.649216      7.7094383    61.941345
   -62.47303     -15.684718     24.72337     -56.358646    -51.573483
   -75.76517      11.431244     53.70022     103.14696     -36.154602
   -31.369438    -14.887188    -46.788315     60.877975     14.88719
   -32.16696      74.7018     -114.04651     -18.874832     29.774384
    40.408085    -14.621349     21.799103      3.4559555    67.52403
   -32.166965     71.77753     -41.205612    -28.4

In [102]:
markers_seeg_file_name = "patients/csl/seeg_csl_markers.npy"
markers = np.load(f"{PATH_TO_FILE}{markers_seeg_file_name}", allow_pickle=True)

print("Markers shape: ", markers.shape)
print("First time steps: ", markers[:10])

Markers shape:  (86,)
First time steps:  [list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])
 list([array([],
       dtype=[('label', '<U64'), ('position', '<f4'), ('duration', '<f4')])])]


In [103]:
markers[30]

array([('Ripple',   522.9492, 0.), ('Fast Ripple',   523.9258, 0.),
       ('Ripple',  2225.586 , 0.), ('Fast Ripple',  2230.957 , 0.),
       ('Ripple',  2904.2969, 0.), ('Ripple',  3246.0938, 0.),
       ('Ripple',  3443.8477, 0.), ('Fast Ripple',  3724.121 , 0.),
       ('Ripple',  4728.0273, 0.), ('Ripple',  4879.3945, 0.),
       ('Ripple',  5058.1055, 0.), ('Ripple',  5731.9336, 0.),
       ('Ripple',  7260.254 , 0.), ('Fast Ripple',  7265.1367, 0.),
       ('Ripple',  8493.164 , 0.), ('Fast Ripple',  8501.465 , 0.),
       ('Ripple',  9497.07  , 0.), ('Ripple',  9965.82  , 0.),
       ('Fast Ripple',  9966.309 , 0.), ('Fast Ripple', 10702.637 , 0.),
       ('Ripple', 10716.309 , 0.), ('Ripple', 11353.516 , 0.),
       ('Ripple', 11503.906 , 0.), ('Fast Ripple', 12854.98  , 0.),
       ('Ripple', 12874.512 , 0.), ('Ripple', 13606.934 , 0.),
       ('Fast Ripple', 13614.746 , 0.), ('Ripple', 14166.016 , 0.),
       ('Ripple', 14339.355 , 0.), ('Fast Ripple', 14495.605 , 0.),
     

## Define some important parameters of the input data

In [104]:
sampling_rate = 2048    # 2048 Hz
input_duration = 63.1049 * (10**3)    # 120000 ms or 120 seconds
num_samples = recorded_data.shape[0]    # 2048 * 120 = 245760
num_channels = recorded_data.shape[1]   # 960

x_step = 1/sampling_rate * (10**3)  # 0.48828125 ms

## Plot the data in an interactive Line Plot

### Configurable parameters

In [105]:
# Find the index of the channels with annotated events
ch_with_events = []
for i in range(len(markers)):
    if len(markers[i][0]) != 0:
        ch_with_events.append(i)

print("Channels with events: ", ch_with_events)

Channels with events:  [22, 23, 24, 29, 30, 31, 32, 36, 37, 41, 42, 48, 49, 50, 64, 65, 66]


In [106]:
# Channels to plot
min_channel = 0     #   Index of the first channel to plot
max_channel = 9 # num_channels     #   Index of the last channel to plot

# Create set containing the indices of the channels used
channels_used = {0, 5, 10, 22, 30, 37, 49, 65}
# channels_used = set(range(0, max_channel+1, 1))

# Sort the set by value and convert it to a list
channels_used = sorted(channels_used)

print(channels_used)

[0, 5, 10, 22, 30, 37, 49, 65]


In [107]:
# Interactive Plot for the HFO detection
# bokeh docs: https://docs.bokeh.org/en/2.4.1/docs/first_steps/first_steps_1.html

from utils.line_plot import create_fig  # Import the function to create the figure
from bokeh.models import Range1d

# Define the x and y values
# Should the first input start at 0 or x_step?
# TODO: is it okay to create a range with floats?
x = [val for val in np.arange(x_step, input_duration + x_step, x_step)] 

# Create the y arrays for the voltage plot representing the voltage of each electrode
v_yarrays = []
# Add each channel
for ch_idx in channels_used:  # TODO: Only adding 1 channel for now
    v_yarrays.append([val[ch_idx] for val in recorded_data])
    

## Define a specific time interval to plot

In [108]:
min_t = 0
max_t = input_duration # / 10

# Trim the x and y arrays to the desired time range
start_index = int(min_t / x_step)
num_data_points = int((max_t - min_t) / x_step)

x = x[start_index:start_index + num_data_points + 1]
for i in range(len(v_yarrays)):
    v_yarrays[i] = v_yarrays[i][start_index:start_index + num_data_points + 1]  # Trim the y arrays

print("num_data_points", num_data_points)

num_data_points 129238


## Create the Plot

In [109]:
# Create the plot
# List of tuples containing the y values and the legend label
hfo_y_arrays = [(voltage_val, f"Ch. {idx}") for idx, voltage_val in enumerate(v_yarrays)]

# Create the SEEG Voltage plot
hfo_plot = create_fig(
    title="SEEG Voltage dynamics of Synthetic data from Human recordings", 
    x_axis_label='time (ms)', 
    y_axis_label='Voltage (μV)',
    x=x, 
    y_arrays=hfo_y_arrays, 
    sizing_mode="stretch_both", 
    tools="pan, box_zoom, wheel_zoom, hover, undo, redo, zoom_in, zoom_out, reset, save",
    tooltips="Data point @x: @y",
    legend_location="top_right",
    legend_bg_fill_color="navy",
    legend_bg_fill_alpha=0.1,
    # y_range=Range1d(-0.05, 1.05)
)

# If there are more than 30 channels, hide the legend
if max_channel - min_channel + 1 > 30:
    # Hide the legend
    hfo_plot.legend.visible = False

## Add Box Annotations to the plot to identify the marked HFOs (ground truth)

TODO: The annotations must belong to specific channels, therefore they must be shown accordingly to that channel

In [110]:
from bokeh.models import BoxAnnotation

confidence_range = 100          # When the duration is missing (0), we consider the 200ms window around the marked position
color_map = {                   # Map the label to a color
    'Spike': 'red',
    'Fast Ripple': 'blue',
    'Ripple': 'green',  
    'Spike+Ripple': 'yellow',
    'Spike+Fast-Ripple': 'magenta',
    'Ripple+Fast-Ripple': 'cyan',
    'Spike+Ripple+Fast-Ripple': 'black'
}

visited_markers = {}    # Avoid inserting multiple boxes for the same marker (only one of each label)
use_visited = True     # Boolean controlling if we remove duplicate markers

plot_instant = True     # Boolean to plot the markers as instant events or as boxes
instant_width = 100 # 20       # Width of the instant event for visualization purposes

show_markers = False    # Boolean to show the markers
if show_markers:
    for ch_idx in channels_used:
        channel_markers = markers[ch_idx]
        # print("channel_markers", channel_markers)
        for idx2, marker in enumerate(channel_markers):
            # print("marker:", marker)
            if len(marker) == 0:
                continue

            if use_visited:
                # Check if the marker has already been visited and skip it if it has
                if marker['position'] in visited_markers:
                    visited_labels = visited_markers[marker['position']]    # Get the labels that already have an annotation for this position
                    if marker['label'] in visited_labels:
                        # print("Skipping marker", marker['position'], marker['label'])
                        continue    # Skip this marker
                    else:
                        visited_labels.append(marker['label'])  # Add the label to the visited labels
                else:
                    visited_markers[marker['position']] = [marker['label']] # Add the marker to the visited markers

            # Add a box annotation for each marker
            has_duration = marker['duration'] > 0
            
            confidence_constant = 0 if plot_instant or has_duration else confidence_range
            instant_padding = instant_width if plot_instant else 0

            left = marker['position'] - confidence_constant
            right = marker['position'] + confidence_constant + instant_width
            box_color = color_map[marker['label']]  # Choose a color according to the label
            
            if left < min_t or right > max_t:
                continue    # Skip this marker
            

            box = BoxAnnotation(left=left, right=right, fill_color=box_color, fill_alpha=0.1)
            # print("Added marker for channel: ", ch_idx, " at position: ", left)
            hfo_plot.add_layout(box)

## Show the Plot

In [111]:
import bokeh.plotting as bplt

showPlot = True
if showPlot:
    bplt.show(hfo_plot)

## Export the plot to a file

In [112]:
export = True

MARKED_SUFFIX = "_marked" if show_markers else "_non_marked"
if export:
    file_path = f"{PATH_TO_FILE}patients/csl/results/seeg{MARKED_SUFFIX}.html"

    # Customize the output file settings
    bplt.output_file(filename=file_path, title="SEEG Data - Voltage dynamics across time")

    # Save the plot
    bplt.save(hfo_plot)

In [115]:
# Close bplot
bplt.curdoc().clear()
bplt.reset_output()