In [16]:
from behavior_utils import *
import matplotlib.pyplot as plt
import numpy as np

### Initialize paths


In [100]:
root_path = 'D:/data'
experiment = 'clickbait-motivate'
events_dir = 'bonsai'
session_type = 'm'

# Get paths to relevant files
ttl_paths = get_file_paths(f"{root_path}/{experiment}/preprocessed", 'npy', 'ttl_times', session_type=session_type, print_paths=True, print_n=3)
video_paths = get_file_paths(f"{root_path}/{experiment}/{events_dir}", 'avi', '', session_type=session_type, print_paths=True, print_n=3)
timestamps = get_file_paths(f"{root_path}/{experiment}/{events_dir}", 'csv', 'timestamp', session_type=session_type, print_paths=True, print_n=3)
event_paths_a = get_file_paths(f"{root_path}/{experiment}/{events_dir}", 'csv', '_eventsA', session_type=session_type, print_paths=True, print_n=3)
event_paths_b = get_file_paths(f"{root_path}/{experiment}/{events_dir}", 'csv', '_eventsB', session_type=session_type, print_paths=True, print_n=3)
sleap_paths = get_file_paths(f"{root_path}/{experiment}/{events_dir}", 'csv', 'slp', session_type=session_type, print_paths=True, print_n=3)
sync_paths = get_file_paths(f"{root_path}/{experiment}/preprocessed", 'txt', 'sync_messages', session_type='', print_paths=True, print_n=3)
reward_paths = get_file_paths(f"{root_path}/{experiment}/preprocessed", 'npy', 'reward_state', session_type=session_type, print_paths=True, print_n=3)

# Root directory for saving processed data
save_path = Path(f"{root_path}/{experiment}/{events_dir}")

Found 72 ttl_times.npy files
0 D:\data\clickbait-motivate\preprocessed\7010\m1\ttl_times.npy
1 D:\data\clickbait-motivate\preprocessed\7010\m2\ttl_times.npy
2 D:\data\clickbait-motivate\preprocessed\7010\m4\ttl_times.npy
Found 73 .avi files
0 D:\data\clickbait-motivate\bonsai\7010\m1\10082025_7010_m1.avi
1 D:\data\clickbait-motivate\bonsai\7010\m2\10092025_7010_m2.avi
2 D:\data\clickbait-motivate\bonsai\7010\m4\10132025_7010_m4.avi
Found 73 timestamp.csv files
0 D:\data\clickbait-motivate\bonsai\7010\m1\10082025_7010_m1_video_timestamp.csv
1 D:\data\clickbait-motivate\bonsai\7010\m2\10092025_7010_m2_video_timestamp.csv
2 D:\data\clickbait-motivate\bonsai\7010\m4\10132025_7010_m4_video_timestamp.csv
Found 73 _eventsA.csv files
0 D:\data\clickbait-motivate\bonsai\7010\m1\10082025_7010_m1_eventsA.csv
1 D:\data\clickbait-motivate\bonsai\7010\m2\10092025_7010_m2_eventsA.csv
2 D:\data\clickbait-motivate\bonsai\7010\m4\10132025_7010_m4_eventsA.csv
Found 73 _eventsB.csv files
0 D:\data\clickba

In [101]:
# Make sure only paths with valid TTL files are used
video_paths = filter_paths(video_paths, ttl_paths)
timestamps = filter_paths(timestamps, ttl_paths)
event_paths_a = filter_paths(event_paths_a, ttl_paths)
event_paths_b = filter_paths(event_paths_b, ttl_paths)
sleap_paths = filter_paths(sleap_paths, ttl_paths)
sync_paths = filter_paths(sync_paths, ttl_paths)
reward_paths = filter_paths(reward_paths, ttl_paths)

print(f"Using {len(video_paths)} .avi files.")
print(f"Using {len(timestamps)} timestamp.csv files.")
print(f"Using {len(event_paths_a)} eventsA.csv files.")
print(f"Using {len(event_paths_b)} eventsB.csv files.")
print(f"Using {len(sleap_paths)} sleap.csv files.")
print(f"Using {len(sync_paths)} sync_messages.txt files.")
print(f"Using {len(reward_paths)} reward_state.npy files.")

Using 72 .avi files.
Using 72 timestamp.csv files.
Using 72 eventsA.csv files.
Using 72 eventsB.csv files.
Using 72 sleap.csv files.
Using 72 sync_messages.txt files.
Using 72 reward_state.npy files.


### Dict of 'names': 'types' for event dataframe columns

In [103]:
columns = {'trial_number': 'uint8',
            'timestamp': 'str',
            'poke_left': 'bool',
            'poke_right': 'bool',
            'centroid_x': 'uint16',
            'centroid_y': 'uint16',
            'target_cell': 'str',
            'iti': 'bool',
            'water_left': 'bool',
            'water_right': 'bool',
            'reward_state': 'bool',
            'click': 'bool',
            'flip_state': 'bool'
            }

In [104]:
def find_offset_from_penultimate(dataframe, event_bool):
    # Get penultimate True->False transition in reward_state column
    reward_state_transitions = np.diff(dataframe['reward_state'].astype(int)) == -1
    transition_indices = np.where(reward_state_transitions)[0]
    penultimate_reward_transition_idx = transition_indices[-2]
    penultimate_event_time = dataframe.loc[penultimate_reward_transition_idx, 'timestamp_ms']
    
    # Get final timestamp from dataframe
    final_timestamp = int(dataframe['timestamp_ms'].iloc[-1])
    
    # Get penultimate 1->0 transition in event_bool BEFORE final_timestamp
    bool_transitions = np.where(np.diff(event_bool.astype(int)) == -1)[0]
    
    # Filter for transitions that occur before final_timestamp
    valid_bool_transitions = bool_transitions[bool_transitions < final_timestamp]
    penultimate_bool_transition_sample = valid_bool_transitions[-2]
    
    # Calculate offset (timestamp of reward - sample number of bool transition)
    event_difference = int(penultimate_event_time) - penultimate_bool_transition_sample
    
    return event_difference

### Process events and assign TTL times to each observation

In [106]:
sleap = True  # Whether to include SLEAP data

for session_idx in range(len(ttl_paths)):
    try:
        ttl_times = np.load(ttl_paths[session_idx])
        reward_bool = np.load(reward_paths[session_idx])

        # Process event data from Bonsai and concatenate into a single dataframe
        event_data = process_events(session_idx, event_paths_a, event_paths_b, columns)

        oe_start_time = open_ephys_start_time(sync_paths[session_idx])  # Get Open Ephys start time
        event_data['timestamp'] = event_data['timestamp'].apply(timestamp_to_ms)  # Convert to ms
        event_data['timestamp'] = event_data['timestamp'] - int(oe_start_time)  # Align to Open Ephys start time
        event_data.rename(columns={'timestamp': 'bonsai_ts'}, inplace=True)  # Rename timestamp column

        """Align event timestamps with  Open Ephys TTL."""

        ttl_offset = len(ttl_times) - len(event_data)
        timestamp_ms =  ttl_times[ttl_offset:]
        event_data['timestamp_ms'] = timestamp_ms.astype(np.int32)

        """SLEAP"""
        # Load SLEAP tracking data and concatenate with events df
        if sleap:
            if len(sleap_paths) == len(video_paths): 
                sleap_data = pd.read_csv(f"{sleap_paths[session_idx]}")
                sleap_offset = len(sleap_data) - len(event_data)  # Video starts slightly ahead of events
                sleap_data = sleap_data[sleap_offset:].reset_index(drop=True)  # Trim to match event df length
                event_data = pd.concat([event_data, sleap_data], axis=1, join='inner')

        # Set up save path
        mouse_id = ttl_paths[session_idx].parents[1].name
        session_id = ttl_paths[session_idx].parents[0].name
        event_path = save_path / mouse_id / session_id

        # if event path doesn't exist, create it
        if not event_path.exists():
            event_path.mkdir(parents=True, exist_ok=True)

        # Save event data
        event_data.to_csv(save_path / mouse_id / session_id / f'events.csv', index=False)
        print(f"Saved events for {mouse_id} {session_id} to {save_path / mouse_id / session_id / f'events.csv'}")
    except Exception as e:
        print(f"{mouse_id}-{session_id} error: {e}")
        continue

Saved events for 7010 m1 to D:\data\clickbait-motivate\bonsai\7010\m1\events.csv
Saved events for 7010 m2 to D:\data\clickbait-motivate\bonsai\7010\m2\events.csv
Saved events for 7010 m4 to D:\data\clickbait-motivate\bonsai\7010\m4\events.csv
Saved events for 7010 m5 to D:\data\clickbait-motivate\bonsai\7010\m5\events.csv
Saved events for 7010 m6 to D:\data\clickbait-motivate\bonsai\7010\m6\events.csv
Saved events for 7010 m7 to D:\data\clickbait-motivate\bonsai\7010\m7\events.csv
Saved events for 7010 m8 to D:\data\clickbait-motivate\bonsai\7010\m8\events.csv
Saved events for 7010 m9 to D:\data\clickbait-motivate\bonsai\7010\m9\events.csv
Saved events for 7010 m10 to D:\data\clickbait-motivate\bonsai\7010\m10\events.csv
Saved events for 7010 m11 to D:\data\clickbait-motivate\bonsai\7010\m11\events.csv
Saved events for 7010 m12 to D:\data\clickbait-motivate\bonsai\7010\m12\events.csv
Saved events for 7010 m13 to D:\data\clickbait-motivate\bonsai\7010\m13\events.csv
Saved events for 701

In [None]:
flip_paths =  get_file_paths('D:/data/clickbait-motivate', extension='npy', keyword='reward_state', session_type='m', print_paths=True, print_n=20)
events_paths = get_file_paths('D:/data/clickbait-motivate/bonsai', extension='csv', keyword='events', session_type='m', keyword_exact=True, print_paths=True, print_n=20)
flip_paths = filter_paths(flip_paths, events_paths)

for ii in range(len(events_paths)):
    try:
        # Load events data
        event_data = pd.read_csv(events_paths[ii])

        # Load flip state data
        flip_state = np.load(flip_paths[ii])
        flip_bool = flip_state > 0

        # Calculate reward state offset and apply to timestamp_ms column
        offset_pre = find_offset_from_penultimate(event_data, reward_bool)
        event_data['timestamp_ms'] = event_data['timestamp_ms'] - offset_pre
        offset_post = find_offset_from_penultimate(event_data, reward_bool)
        print(f"Before offset (ms): {offset_pre} - After offset (ms): {offset_post}")

        # Save aligned csv
        event_data.to_csv(f"{events_paths[ii]}", index=False)

    except:
        print(f"Error processing {events_paths[ii]}: {e}")
        continue
