In [1]:
import numpy as np
import pandas as pd
from clickbait import *
from clickbait_viz import *

### Get paths of clickbait sessions.

In [2]:
# Where to look for clickbait datasets
data_dir = f'A:/clickbait-data/'

# Get session and filenames for directories with .avi files greater than 1gb
mice, sessions, files = scan_dataset(data_dir, min_size_bytes=1e9, filetype='.avi')

print(f"Located {len(mice)} sessions.")

## Print mouse and session names
# for ii in range(len(mice)):
#     print(f"{mice[ii]}-{sessions[ii]}")

Located 51 sessions.


### Initialize dataset object.

In [3]:
# Initialize experiment class
clickbait = BehaviorExperiment(data_dir=data_dir)

### Load sessions into dataset object.

In [4]:
# Load all sessions
clickbait.load_all_sessions(mice, sessions, files)

In [5]:
# Test syntax
# len(clickbait.sessions['1006_full4'].video_ts)

### Build Summary Dataframe


In [6]:
# Build summary dataframe
clickbait.build_summary_df(mice, sessions)
clickbait.summary_df['velocity_diff'] = clickbait.summary_df['reward_velocity'] - clickbait.summary_df['search_velocity']
clickbait.summary_df

Unnamed: 0,mouse_id,session_id,session_number,avg_velocity,distance_traveled,trials_completed,search_velocity,reward_velocity,video_length,velocity_diff
0,1003,diet1,1,5.007283,691530.880468,72,5.436014,6.323677,45.49,0.887663
1,1003,diet2,2,4.062164,559843.432625,66,4.161127,4.913927,45.4,0.7528
2,1003,diet3,3,4.253292,587447.743952,87,5.106191,5.748446,45.49,0.642255
3,1003,diet4,4,4.382331,585847.602872,83,6.76357,4.798645,44.03,-1.964925
4,1003,full1,5,4.724145,627016.882824,49,5.446823,11.102527,43.72,5.655703
5,1003,full2,6,6.94245,895881.532906,112,7.773386,11.046791,42.5,3.273405
6,1003,full3,7,5.386913,697874.629723,86,5.687364,10.061779,42.67,4.374414
7,1003,full4,8,5.599699,773385.640592,91,5.855302,8.391198,45.49,2.535895
8,1003,full5,9,5.655667,769232.860689,96,6.222153,10.251644,44.8,4.029491
9,1003,full6,10,5.146586,708288.543977,89,4.908177,8.109666,45.33,3.201489


In [11]:
# Get a specific session (We are looking for a broken one)
idx = 31
mouse = mice[idx]
session = sessions[idx]
session = clickbait.get_session(mouse, session)

# View session info
session.print_session_info()

Mouse: 1005 Session: diet6
Trials Completed: 116
Video length: 122475 frames
Events Data Length: 122475 rows
Video length at 50.6 FPS: 40.34 minutes


### Visualize Broken Trajectories

In [15]:
session.event_data

Unnamed: 0,timestamp,trial_number,poke_left,poke_right,centroid_x,centroid_y,target_cell,iti,reward_state,water_left,water_right,click,distance,drinking,frame_ms,gap
0,2024-11-18 12:27:01.515251200,0,False,False,737,1742,[0],False,False,False,False,False,,0,,0
1,2024-11-18 12:27:01.531225600,0,False,False,737,1742,[0],False,False,False,False,False,0.000000,0,15.9744,0
2,2024-11-18 12:27:01.554060800,0,False,False,737,1742,[0],False,False,False,False,False,0.000000,0,22.8352,0
3,2024-11-18 12:27:01.570547200,0,False,False,737,1742,[0],False,False,False,False,False,0.000000,0,16.4864,0
4,2024-11-18 12:27:01.589452800,0,False,False,737,1742,[0],False,False,False,False,False,0.000000,0,18.9056,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122470,2024-11-18 13:12:30.049164800,116,False,False,657,993,[],False,True,False,False,False,2.236068,0,19.4816,0
122471,2024-11-18 13:12:30.074419200,116,False,False,659,990,[],False,True,False,False,False,3.605551,0,25.2544,0
122472,2024-11-18 13:12:30.092096000,116,False,False,662,985,[],False,True,False,False,False,5.830952,0,17.6768,0
122473,2024-11-18 13:12:30.115468800,116,False,False,663,980,[],False,True,False,False,False,5.099020,0,23.3728,0


In [19]:
visualize_trial_trajectory(session.event_data, [10], color_code='trial_number')


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



### Heal Gaps

In [22]:
tgt_duration = 1000/50.6  # Frame duration at 50.6 FPS

# Isolate a trial
gapped_trial = session.event_data.loc[session.event_data['trial_number'] == 10]




In [28]:
# Create a histogram of differences between consecutive timestamps in milliseconds
gap_size = gapped_trial['timestamp'].diff().dt.total_seconds()*1000
min(gap_size.dropna())


17.5488

In [24]:
1000/50.6

19.76284584980237

In [41]:
# List to store indices where imputed rows are added
imputed_indices = []

def heal_gaps(df, gap_size, fps):
    # Iterate over the DataFrame to find gaps
    i = 0
    while i < len(df) - 1:
        current_time = df.iloc[i]['timestamp']
        next_time = df.iloc[i + 1]['timestamp']
            
        # Calculate the gap in milliseconds
        gap_ms = (next_time - current_time).total_seconds() * 1000
        
        # Check if the gap is more than 25 ms
        if gap_ms > 25:
            # Calculate the number of new rows needed
            num_new_rows = int(gap_ms // 19.76)
            
            # Generate new timestamps
            new_timestamps = [current_time + pd.Timedelta(milliseconds=19.76 * j) for j in range(1, num_new_rows + 1)]
            
            # Create new rows
            for new_time in new_timestamps:
                new_row = df.iloc[i].copy()
                new_row['timestamp'] = new_time
                new_row[['centroid_x', 'centroid_y', 'distance']] = np.nan  # Set these to NaN or interpolate later
                new_row['gap'] = 1
                df = pd.concat([df.iloc[:i+1], pd.DataFrame([new_row]), df.iloc[i+1:]]).reset_index(drop=True)
                imputed_indices.append(i + 1)
                i += 1  # Adjust index due to new row insertion
        
        i += 1
        
    df[['centroid_x', 'centroid_y', 'distance']] = df[['centroid_x', 'centroid_y', 'distance']].interpolate()
    return df

    # # Interpolate 'centroid_x', 'centroid_y', and 'distance' columns
    # df[['centroid_x', 'centroid_y', 'distance']] = df[['centroid_x', 'centroid_y', 'distance']].interpolate()

    # print("DataFrame with imputed rows:")
    # print(df)
    # print("\nIndices of imputed rows:")
    # print(imputed_indices)

In [43]:
healed = heal_gaps(gapped_trial, gap_size=25, fps=50.6)

In [45]:

healed.head()

Unnamed: 0,timestamp,trial_number,poke_left,poke_right,centroid_x,centroid_y,target_cell,iti,reward_state,water_left,water_right,click,distance,drinking,frame_ms,gap
0,2024-11-18 12:30:38.582259200,10,False,False,264.0,1727.0,[6],False,False,False,False,False,2.828427,0,19.8144,0
1,2024-11-18 12:30:38.605452800,10,False,False,268.0,1725.0,[6],False,False,False,False,False,4.472136,0,23.1936,0
2,2024-11-18 12:30:38.624435200,10,False,False,274.0,1723.0,[6],False,False,False,False,False,6.324555,0,18.9824,0
3,2024-11-18 12:30:38.647705600,10,False,False,273.0,1724.0,[6],False,False,False,False,False,1.414214,0,23.2704,0
4,2024-11-18 12:30:38.671308800,10,False,False,270.0,1725.0,[6],False,False,False,False,False,3.162278,0,23.6032,0


In [48]:
visualize_trial_trajectory(healed, [10], color_code='trial_number')


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`

