Notebook description: <br>

This notebook is created to analysis the kinematics data obtained after running the tracking code. <br>

Most of the tracking and analysis codes are written by Duncan and pieced together in this notebook.

### Import experiment (tracked and with kinematics data).

In [2]:
from behavior_analysis.experiment import  BehaviorExperiment
import pandas as pd
# Open experiment
experiment = BehaviorExperiment.open(r"C:\Users\manyung.ng\Documents\behaviour_analysis\behavior_analysis_tracking\uv_trial")
print(experiment)
# Open video and bout info
video_info = pd.read_csv(experiment.directory.joinpath('video_data.csv'), dtype={'ID': str, 'code': str})
bouts_df = pd.read_csv(experiment.subdirs["analysis"].joinpath('bouts.csv'),
                        dtype={'ID': str, 'code': str})

name: uv_trial
date: 2025-06-25
animal_data: fish_data.csv
video_data: video_data.csv
mask_data: mask_data.csv
bout_detection: {'threshold': 0.02, 'winsize': 0.05}



### Data information

- Fish ID in the format 'YYYYMMDDNN' (year, month, day, zero-padded fish number; e.g. fish_1 on 2025_05_29 would be 20250529) <br>

- Video code in the format 'IDHHMMSS' (fish_ID, hour, minute, second) <br>

- Number of rows == number of frames in the video

#### Description of data in the "kinematics/code + .csv"
| Column name | Column data description |
| --- | --- |
| 'k0' - 'k(n-1)' | angle of tangents between n successive tail points |
| 'tip' | the average curvature (tail angle) over the last 20% of the tail|
| 'length' | the length of the tail (useful for finding tracking errors) |
| 'left' | the angle of the left eye relative to the heading |
| 'right' | the angle of the right eye relative to the heading |
| 'speed' | the instantaneous speed in each frame |
| 'angular_velocity' | the instantaneous angular velocity in each frame |
| 'tracked' | whether kinematic data exists from the frame |

## Bout mapping

In [6]:
from behavior_analysis.experiment import BehaviorExperiment
from behavior_analysis.analysis.bouts import BoutData
from behavior_analysis.analysis.bout_mapping import calculate_distance_matrix_templates, interpolate_nd
from behavior_analysis.utilities.timer import Timer
import pandas as pd
import numpy as np
from pathlib import Path
import os

In [7]:
if __name__ == "__main__":
    # Open template bouts
    template_directory = Path(r'J:\Duncan Mearns\behavior_mapping')
    template_frame_rate = 500.
    # Open 1744 exemplar bouts representative of all behaviors
    exemplars_df = pd.read_csv(template_directory.joinpath('exemplars.csv'),
                               dtype={'ID': str, 'code': str})
    # Open tail statistics and eigenfish for bout mapping
    eigenfish = np.load(template_directory.joinpath('eigenfish.npy'))
    eigenfish = eigenfish[:3]  # take first three eigenfish only
    mean, std = np.load(template_directory.joinpath('tail_statistics.npy'))
    # Import template bouts
    templates = BoutData.from_metadata(exemplars_df, template_directory.joinpath("kinematics"))
    # Map template bouts onto eigenfish
    templates = templates.map(vectors=eigenfish, whiten=True, mean=mean, std=std)
    templates = templates.to_list(values=True)
    print(len(templates))
    print(templates[0].shape)

Opening 681 csv files...


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
if __name__ == "__main__":
    # Import bouts
    bouts = BoutData.from_metadata(bouts_df, experiment.subdirs["kinematics"], tail_only=True)
    # Map bouts onto eigenfish
    bouts = bouts.map(vectors=eigenfish, whiten=True, mean=mean, std=std)
    # Start timer
    timer = Timer()
    analysis_times = []
    timer.start()
    # distance matrix
    distances = {}
    # Iterate through fish
    output_directory = experiment.subdirs["analysis"].joinpath('bout_distances')
    if not output_directory.exists():
        output_directory.mkdir(parents=True)
    for ID in bouts.metadata['ID'].unique():
        # Save distance matrix for each fish to bout_distances folder in analysis directory
        output_path = output_directory.joinpath(ID + '.npy')
        path_exist = create_filepath(output_directory, ID, '.npy', True)
        # load distance matrix if the calculation is already performed.
        if path_exist:
            distances[ID] = np.load(output_path)
            print('distance matrices loaded.')
        if not output_path.exists():
            print(ID + '...', end=' ')
            # Interpolate bouts to correct frame rate
            fish_bouts = []
            for i, bout in bouts.iter(IDs=[ID], values=True):
                code = bouts.metadata.loc[i, 'code']
                fps = video_info[video_info['code'] == code].squeeze().fps
                interp = interpolate_nd(bout, fps, template_frame_rate)
                fish_bouts.append(interp)
            # Calculate distance matrix
            D = calculate_distance_matrix_templates(fish_bouts, templates, fs=template_frame_rate)
            # Save distance matrix
            np.save(output_path, D)
            # Show time taken
            time_taken = timer.lap()
            analysis_times.append(time_taken)
            print(timer.convert_time(time_taken))
    average_time = timer.average
    print(f'Total time: {timer.convert_time(timer.stop())}')
    print(f'Average time: {timer.convert_time(average_time)}')

In [None]:
distances_df = {"ID": distances}
distances_df = pd.DataFrame.from_dict(distances_df)
distances_df

{'ID': {'2025052901': array([[   594.63690566,    602.25356471,    592.46181876, ...,
             622.65944313,    437.79849277,    416.03782588],
         [   899.31453783,    911.62084822,    946.48550441, ...,
             926.96621981,    746.399745  ,    718.7528547 ],
         [   877.12596895,    786.52540852,    869.60960981, ...,
             841.48512722,    651.2428436 ,    677.20462238],
         ...,
         [   859.6941537 ,    889.63900355,    951.82616798, ...,
             908.24237765,    681.09783619,    660.91312106],
         [261835.55494616, 261837.08654446, 261911.17051351, ...,
          261886.3906826 , 261622.23129113, 261594.0507287 ],
         [ 20545.22734257,  20328.20362278,  20596.30522344, ...,
           20525.22468824,  20554.60284008,  20526.29131712]]),
  '2025052903': array([[  985.38848268,   915.64789378,  1050.81924416, ...,
           1007.45145019,   779.15366496,   791.16495203],
         [  538.84618221,   548.75748748,   610.09204832, ..

In [None]:
# this chunk keeps crashing vscode.
# let's run line by line to see where the crash occur.
# Assign exemplars
mapped_bouts = bouts #.map(eigenfish, whiten=True, mean=mean, std=std)
mapped_bouts['exemplar'] = None
for ID, fish_distances in distances.iteritems():
    bout_idxs = mapped_bouts[mapped_bouts['ID'] == ID].index
    nearest_exemplar = np.argmin(fish_distances, axis=1)
    mapped_bouts.loc[bout_idxs, 'exemplar'] = nearest_exemplar
mapped_bouts.to_csv(os.path.join(experiment.subdirs['analysis'], 'mapped_bouts.csv'))

ValueError: Unable to coerce to Series, length must be 3: given 50

## Bout classification

In [None]:
from behavior_analysis.experiment import BehaviorExperiment
from behavior_analysis.analysis.bouts import BoutData
from behavior_analysis.analysis.eye_convergence import calculate_convergence
import pandas as pd
from matplotlib import pyplot as plt
from scipy.spatial.distance import squareform
from pathlib import Path
import numpy as np
from behavior_analysis.analysis.bout_mapping import calculate_distance_matrix_templates, interpolate_nd
from matplotlib import pyplot as plt

In [None]:
if __name__ == "__main__":

    experiment = BehaviorExperiment.open(r"C:\Users\manyung.ng\Documents\behaviour_analysis\behavior_analysis_tracking\test_analysis")
    print(experiment)
    bouts_path = experiment.subdirs['analysis'].joinpath('bouts.csv')
    bouts_df = pd.read_csv(bouts_path, dtype={'ID': str, 'video_code': str})
    video_info = pd.read_csv(experiment.directory.joinpath('video_data.csv'), dtype={'ID': str, 'video_code': str})

    # Average eye convergence over 20 ms
    window = 0.02

    # Import convergence data
    fish_convergence = pd.read_csv('', dtype={'ID': str})
    convergence_states = []
    # Import bout data
    bouts = BoutData.from_metadata(bouts_df, experiment.subdirs['kinematics'], tail_only=False)
    for i, bout in bouts.iter():
        # Bout info
        bout_info = bouts.metadata.loc[i]
        fps = video_info[video_info["code"] == bout_info.code].squeeze().fps
        ID = bout_info.ID
        fish_info = fish_convergence[fish_convergence["ID"] == ID].squeeze()
        # Calculate convergence
        bout_convergence = np.degrees(calculate_convergence(bout))
        w = int(window * fps)
        convergence_start = bout_convergence[:w].mean()
        convergence_end = bout_convergence[-w:].mean()
        convergence_states.append(np.array([convergence_start, convergence_end]) >= fish_info.threshold)
    convergence_states = np.array(convergence_states)
    # Find bout phases
    spontaneous = (~convergence_states[:, 0]) & (~convergence_states[:, 1])
    early = (~convergence_states[:, 0]) & (convergence_states[:, 1])
    mid = (convergence_states[:, 0]) & (convergence_states[:, 1])
    late = (convergence_states[:, 0]) & (~convergence_states[:, 1])
    phase_labels = np.column_stack([spontaneous, early, mid, late])
    phase_labels = np.argwhere(phase_labels)[:, 1]
    bouts_df['phase'] = phase_labels
    bouts_df.to_csv(experiment.subdirs["analysis"].joinpath("bout_convergence_labels.csv"), index=False)


## Behaviour analysis (Mearns et al., 2020)

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.spatial import distance as ssd
from matplotlib import pyplot as plt

In [None]:
plt.rcParams["figure.dpi"] = 150

In [None]:
# set data path
data_path = Path(r"C:\Users\manyung.ng\Documents\behaviour_analysis\behavior_analysis_tracking\test_analysis")

In [None]:
# import metadata
md = pd.read_csv(data_path.joinpath("mapped_bouts.csv"), index_col="bout_index", dtype={"ID": str, "video_code": str})

In [None]:
md

In [None]:
bout_md = md.loc[0]
bout_md

In [None]:
trial_path = data_path.joinpath("kinematics", bout_md["ID"], bout_md["video_code"] + ".csv")
trial_kinematics = pd.read_csv(trial_path)