In [1]:
import os
import ndx_events
import pandas as pd

from pynwb import NWBHDF5IO

from settings_general import subject_id_batch_cage_dict
from shared.create_frame_based_epochs import create_cleaned_event_df, get_led_onsets
from shared.eeg_video_alignment_functions import adjust_fps, get_first_ttl_offset

In [2]:
nwb_file = r"C:\Users\Olle de Jong\Documents\MSc Biology\rp2\rp2_data\3C_sociability\output\nwb\3c_sociability_39508.nwb"
behaviour_data_folder = r"C:\Users\Olle de Jong\Documents\MSc Biology\rp2\rp2_data\3C_sociability\input\behavioural_data"

Read NWB file to later extract EEG data from

In [3]:
nwb_file_path = os.path.join(nwb_file)
with NWBHDF5IO(nwb_file_path, "r") as io:
    nwb = io.read()
    filtered_eeg = nwb.acquisition['filtered_EEG'].data[:].T[0]
    s_freq = nwb.acquisition['filtered_EEG'].rate
    eeg_ttl_onsets_secs = list(nwb.acquisition["TTL_1"].timestamps)  # timestamps of the TTL onsets in seconds
    genotype = nwb.subject.genotype
    subject_id = nwb.subject.subject_id

In [4]:
print(subject_id, s_freq, genotype)

39508 1084.719057764039 DRD2-WT


Let's retrieve the behaviour data

In [5]:
batch_cage = subject_id_batch_cage_dict[int(subject_id)]
print(f'\nGetting {batch_cage}.xlsx file belonging to subject {subject_id}')

# load the behavioural data and then merge start/stop events
# tracking data from BORIS software has 2 rows for each state event (start/stop), we want one for each
beh_data = pd.read_excel(os.path.join(behaviour_data_folder, f'{batch_cage}.xlsx'))
beh_data = create_cleaned_event_df(beh_data, batch_cage, subject_id, genotype)
beh_data.head()


Getting batch6_cage2.xlsx file belonging to subject 39508


Unnamed: 0,Observation id,subject_id,genotype,Observation date,Observation duration,Media duration (s),FPS,Behavior,Media file name,Frame start,Frame stop,Event duration
0,batch6_cage2,39508,DRD2-WT,2024-04-10 13:59:34.258,455.17,1956.323,30.0,non-social_cup,C:/Users/Olle de Jong/Documents/MSc Biology/rp...,40060,40078,0.6
1,batch6_cage2,39508,DRD2-WT,2024-04-10 13:59:34.258,455.17,1956.323,30.0,non-social_cup,C:/Users/Olle de Jong/Documents/MSc Biology/rp...,40346,40429,2.768
2,batch6_cage2,39508,DRD2-WT,2024-04-10 13:59:34.258,455.17,1956.323,30.0,non-social_cup,C:/Users/Olle de Jong/Documents/MSc Biology/rp...,40461,40566,3.502
3,batch6_cage2,39508,DRD2-WT,2024-04-10 13:59:34.258,455.17,1956.323,30.0,non-social_cup,C:/Users/Olle de Jong/Documents/MSc Biology/rp...,41707,41947,8.003
4,batch6_cage2,39508,DRD2-WT,2024-04-10 13:59:34.258,455.17,1956.323,30.0,non-social_cup,C:/Users/Olle de Jong/Documents/MSc Biology/rp...,42027,42197,5.668


Because the FPS is not the theoretical 30.0, we have to re-calculate it.

And get the LED onsets from the video output folder

In [6]:
# get the LED states for this subject (i.e. get the LED states of the correct video)
# and then get the frames where the LED turned ON (i.e. get all boolean event changes from OFF to ON (0 to 1)
led_onsets = get_led_onsets(r"C:\Users\Olle de Jong\Documents\MSc Biology\rp2\rp2_data\3C_sociability\output\videos", batch_cage)
led_onsets

array([  180,   210,   240, 58422, 58452, 58482], dtype=int64)

After each second, there's another LED flash

In [7]:
adjusted_fps = adjust_fps(filtered_eeg, eeg_ttl_onsets_secs, led_onsets, s_freq, verbose=False)
first_ttl_offset = get_first_ttl_offset(eeg_ttl_onsets_secs, led_onsets, adjusted_fps, s_freq)

print(f"Theoretical FPS: {beh_data.FPS.iloc[0]}. True FPS: {adjusted_fps}")
print(f"\nFirst TTL offset: {first_ttl_offset}. This means that the TTL onset comes {first_ttl_offset} seconds later in the EEG data than in the Video.")

Theoretical FPS: 30.0. True FPS: 29.969207081269676

First TTL offset: 12.349735088399871. This means that the TTL onset comes 12.349735088399871 seconds later in the EEG data than in the Video.


Let's get the actual event start and end timepoints

In [8]:
beh_data.iloc[1]

Observation id                                               batch6_cage2
subject_id                                                          39508
genotype                                                          DRD2-WT
Observation date                                  2024-04-10 13:59:34.258
Observation duration                                               455.17
Media duration (s)                                               1956.323
FPS                                                                  30.0
Behavior                                                   non-social_cup
Media file name         C:/Users/Olle de Jong/Documents/MSc Biology/rp...
Frame start                                                         40346
Frame stop                                                          40429
Event duration                                                      2.768
Name: 1, dtype: object

Let's validate whether the frames actually point to a non-social cup event.

In [9]:
import cv2


def extract_clip(input_video_path, output_video_path, start_frame, end_frame):
    # Open the video file
    cap = cv2.VideoCapture(input_video_path)
    
    # Check if the video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video file.")
        return

    # Get the frames per second (fps) of the video
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Get the width and height of the frames
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can change the codec according to your output format
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    
    # Set the frame position to the start frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    
    current_frame = start_frame
    while current_frame <= end_frame:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        out.write(frame)
        current_frame += 1

    # Release everything when the job is finished
    cap.release()
    out.release()
    print(f"Clip has been saved to {output_video_path}")

In [10]:
print(f'Start frame of event: {beh_data.iloc[1]["Frame start"]}, end frame of event: {beh_data.iloc[1]["Frame stop"]}')

Start frame of event: 40346, end frame of event: 40429


In [11]:
input_video_path = r"C:\Users\Olle de Jong\Documents\MSc Biology\rp2\rp2_data\3C_sociability\input\videos\drd2_batch6_3C-sociability_cage2 Camera 1 20-12-2023 10_34_53 1.mp4"
output_video_path = r"C:\Users\Olle de Jong\Desktop\test\clip\clip1.mp4"

extract_clip(input_video_path, output_video_path, beh_data.iloc[1]["Frame start"], beh_data.iloc[1]["Frame stop"])

Clip has been saved to C:\Users\Olle de Jong\Desktop\test\clip\clip1.mp4


That seems to check out. Now we extract the EEG that belongs to the clip. First define a function that calculates sample from frame number.

In [12]:
def frame_to_sample(video_frame, adjusted_fps, offset, s_freq):
    """
    Function that calculates the EEG sample from the video frame using the adjusted FPS and the calculated offset

    :param video_frame: frame in video that needs to be transformed to EEG sample
    :param adjusted_fps: adjusted FPS (see adjust_fps_get_offset function)
    :param s_freq: EEG sampling frequency
    :param offset:
    :return:
    """
    # go from video frame to seconds
    video_tp_secs = video_frame / adjusted_fps

    # first TTL onset always later in EEG than video, so to go from video tp in seconds to the eeg tp in seconds
    # we add the calculated offset
    eeg_tp_secs = video_tp_secs + offset

    return eeg_tp_secs * s_freq  # go to samples

Now, using that function, let's calculate the actual EEG sample start and end tps

In [13]:
picked_event = beh_data.iloc[1]  # pick random event to test

# get the start and stop frame time-point of this event
start_frame, stop_frame = int(picked_event['Frame start']), int(picked_event['Frame stop'])

# total event duration
event_duration = picked_event['Event duration']

# using the adjusted FPS and the offset of the first TTL, get the start/stop time-points of the event in samples
event_start = int(frame_to_sample(start_frame, adjusted_fps, first_ttl_offset, s_freq))
event_end = int(frame_to_sample(stop_frame, adjusted_fps, first_ttl_offset, s_freq))

print(f'Event start and end timepoints in samples: {event_start, event_end}')

Event start and end timepoints in samples: (1473697, 1476701)


Let's get the EEG using those timepoints

In [14]:
from shared.nwb_retrieval_functions import get_eeg

event_eeg, chans = get_eeg(nwb_file_path, 'filtered_EEG', (event_start, event_end), True)

print(event_eeg.shape)

(9, 3004)


Now, let's tag periods of package-loss

In [15]:
resampled = False  # our data is not resampled
package_loss_cutoff = .15  # set package loss cutoff for testing purposes

In [16]:
from shared.nwb_retrieval_functions import get_package_loss
import numpy as np

# do not handle events that have too much package loss
if not resampled:
    event_duration = event_end - event_start  # in EEG samples
    too_much_package_loss = False

    # get package loss
    ploss, _ = get_package_loss(nwb_file_path, (event_start, event_end))

    # calc total package loss per channel, and if there's too much package loss in a channel, skip this event
    chans_to_check = [chan for chan in chans if 'EMG' not in chan]
    for chan in chans_to_check:
        package_loss = np.sum(np.isnan(ploss[chan]))  # for this channel in EEG samples
        
        too_much_package_loss = True if (package_loss / event_duration) > package_loss_cutoff else False

        # don't need to check them all if we know there's too much in one channel
        if too_much_package_loss:
            break

    if too_much_package_loss:  # then skip this event
        print(f'Skipping {picked_event["Behavior"]} event 1 because there is more than '
              f'{package_loss_cutoff * 100:.2f}% package loss in one of the channels')
    else:
        print('This event is fine')

This event is fine


Let's generate epochs

In [17]:
import mne

ch_types = ["emg" if "EMG" in chan else "eeg" for chan in chans]
info = mne.create_info(ch_names=list(chans), sfreq=s_freq, ch_types=ch_types, verbose="WARNING")
raw = mne.io.RawArray(event_eeg, info, verbose="WARNING")

# make fixed length epochs of 'desired_epoch_length' length
epochs = mne.make_fixed_length_epochs(
    raw, duration=1.0, overlap=0.0, preload=True, verbose="WARNING"
)

# create metadata dataframe and add to epochs array
metadata = pd.DataFrame({
    'subject_id': [subject_id] * len(epochs),
    'genotype': [genotype] * len(epochs),
    'event_n': [1] * len(epochs),
    'event_part_n': range(1, len(epochs) + 1),
    'event_kind': [picked_event["Behavior"]] * len(epochs),
    'total_event_duration': [picked_event["Event duration"]] * len(epochs),
    'epoch_length': [1.0] * len(epochs),
})
epochs.metadata = metadata

In [18]:
print(epochs)

<Epochs |  2 events (all good), 0 – 0.999337 s, baseline off, ~165 kB, data loaded, with metadata,
 '1': 2>


In [19]:
epochs.info['sfreq']

1084.719057764039