In [1]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import json

# Data Wrangling
import h5py
import numpy as np
import pandas as pd
from pathlib import Path
import glob
import matplotlib.pyplot as plt
import tables
import flammkuchen as fl
import seaborn as sns

#### distance estimate 

    from BEATS paper: Solution to distance estimation: ”distance traveled per bout: The change in head position per bout was calculated by averaging the position of the fish head in the arena over all frames in the preceding interbout epoch (starting position) and also for the frames in the following interbout epoch (ending position). The distance between these two points is the distance traveled per bout.” → approximation by interbouts 


    To estimate the distance traveled by a fish during its movements, you need to calculate the distance moved during each episode. Given the timestamps of when each movement started and stopped (data['bout_times']), and the coordinates (m_x_lists, m_y_lists), you can calculate the distance traveled during each movement bout.

Here's a step-by-step process to achieve this:

    Extract the start and stop times for each movement bout.
    For each movement bout, extract the corresponding coordinates from m_x_lists and m_y_lists.
    Calculate the distance traveled during each bout using the Euclidean distance formula.
    Sum up the distances for each bout to get the total distance traveled.


In [3]:
def calculate_distance_traveled(data, m_x_list, m_y_list):
    """
    Calculate the straight-line distance from start to end of each bout.
 
    Parameters:
    - data: dict containing 'bout_times' with shape (trials, 2) indicating start and stop frames of movements.
    - m_x_lists: list of arrays containing x coordinates.
    - m_y_lists: list of arrays containing y coordinates.
 
    Returns:
    - distances: list of floats representing the straight-line distance from start to end of each bout.
    """
    # Initialize a list to store the distances for each bout
    distances = []

    # Iterate over each trial (movement bout)
    for i, (start, stop) in enumerate(data['bout_times']):
        # Extract the start and end coordinates for the current bout
        start_x = m_x_list[start]
        start_y = m_y_list[start]
        end_x = m_x_list[stop]
        end_y = m_y_list[stop]

        # Calculate the straight-line distance from start to end of the bout
        dx = end_x - start_x
        dy = end_y - start_y
        bout_distance = np.sqrt(dx**2 + dy**2)

        # Append the calculated distance to the list
        distances.append(bout_distance)

    return distances



### Bout freq

To calculate the frequency at which the fish initiated a movement over the entire experiment, you need to determine how many times the fish started moving and divide that by the total duration of the experiment.

Here's how you can achieve this:

    Determine the Number of Movements: Count the number of movement episodes (i.e., rows in data['bout_times']).
    Calculate the Total Duration of the Experiment: This is given as m_x_lists[0].shape[0] in frames.
    Compute the Frequency: Divide the number of movements by the total duration in frames to get the frequency of movement initiation.


In [4]:
def calculate_movement_frequency(data, m_x_list, frame_rate=200):
    """
    Calculate the frequency of movement initiation in Hertz.

    Parameters:
    - data: dict containing 'bout_times' with shape (trials, 2) indicating start and stop frames of movements.
    - m_x_lists: list of arrays containing x coordinates (to get the total duration of the experiment).
    - frame_rate: integer indicating the frame rate of the experiment in frames per second.

    Returns:
    - frequency_hz: float representing the frequency of movement initiation in Hertz (movements per second).
    """
    # Step 1: Number of movements
    num_movements = data.shape[0]

    # Step 2: Total duration of the experiment in frames
    total_frames = m_x_list.shape[0]

    # Step 3: Calculate the total duration in seconds
    total_duration_seconds = total_frames / frame_rate

    # Step 4: Calculate the frequency of movement initiation in Hz
    frequency_hz = num_movements / total_duration_seconds

    return frequency_hz


In [5]:
def midpoint(x1, y1, x2, y2):
    return ((x1 + x2)/2, (y1 + y2)/2)

def mid_head(df):
    right_eye_posterior_x =  df['right_eye_posterior'].values[:, 0].astype('float')
    right_eye_posterior_y =  df['right_eye_posterior'].values[:, 1].astype('float')
    right_eye_anterior_x =  df['right_eye_anterior'].values[:, 0].astype('float')
    right_eye_anterior_y =  df['right_eye_anterior'].values[:, 1].astype('float')

    left_eye_posterior_x =  df['left_eye_posterior'].values[:, 0].astype('float')
    left_eye_posterior_y =  df['left_eye_posterior'].values[:, 1].astype('float')
    left_eye_anterior_x =   df['left_eye_anterior'].values[:, 0].astype('float')
    left_eye_anterior_y =   df['left_eye_anterior'].values[:, 1].astype('float')

    left_mid_eye_y = (left_eye_anterior_y+left_eye_posterior_y)/2
    left_mid_eye_x = (left_eye_anterior_x+left_eye_posterior_x)/2

    right_mid_eye_x = (right_eye_anterior_x+right_eye_posterior_x)/2
    right_mid_eye_y = (right_eye_anterior_y+right_eye_posterior_y)/2

    mid_headx, mid_heady = midpoint(left_mid_eye_x,left_mid_eye_y, right_mid_eye_x, right_mid_eye_y) #xy left, xy right

    return mid_headx, mid_heady, left_mid_eye_x, left_mid_eye_y, right_mid_eye_x, right_mid_eye_y

def calculate_distance(origin, points):
    origin = np.array(origin)
    points = np.array(points)
    
    if points.ndim == 1: # if 'points' is a 1D array, convert it to 2D
        points = points[np.newaxis, :]
    
    distances = np.sqrt(np.sum((points - origin)**2, axis=1)) # compute the distance
    return distances

## Set paths

In [6]:
save_data_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\tracking_assessment")

# save_data_path = Path(r'\\portulab.synology.me\data\Kata\testdata\Processed_Data')
save_data_path

WindowsPath('//portulab.synology.me/data/Kata/Processed_Data/tracking_assessment')

In [22]:
# master_path = Path(r"\\portulab.synology.me\data\Kata\Data\230307_visstim_2D")
# master_path = Path(r"\\portulab.synology.me\data\Kata\Data\22042024_visstim_2D_round")
# master_path = Path(r"\\portulab.synology.me\data\Kata\Data\22042024_visstim_2D_2")
# master_path = Path(r"\\portulab.synology.me\data\Kata\Data\13052024_visstim_2D_round")
master_path = Path(r"\\portulab.synology.me\data\Kata\Data\14052024_visstim_2D_round")

# master_path = Path(r'\\portulab.synology.me\data\Kata\testdata\Raw_Data')

fish_paths = list(master_path.glob('*f[0-9]*'))
fish_paths

[WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f0'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f1'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f2'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f3'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f4'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f5'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f6'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f7'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f8'),
 WindowsPath('//portulab.synology.me/data/Kata/Data/14052024_visstim_2D_round/240514_f9')]

In [23]:
fish = 0
fish_path = fish_paths[fish]
exp_name = Path(fish_path).parts[-2] 
# exp_name = 'testfish'
exp_name

'14052024_visstim_2D_round'

In [24]:
# out_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\230307_visstim_2D_")
# out_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\22042024_visstim_2D_round_")
# out_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\22042024_visstim_2D_2_")
# out_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\13052024_visstim_2D_round_")
out_path = Path(r"\\portulab.synology.me\data\Kata\Processed_Data\14052024_visstim_2D_round_")

# out_path = Path(r'\\portulab.synology.me\data\Kata\testdata\Processed_Data')


In [25]:
center_y = 512 #268  #from camera
center_x = 640 #360  #from camera
px_mm = 150 #70

### Load data 

In [26]:
thr =0.95
m_x_lists = []
m_y_lists = []
freq_list = []
distances_ =[]

for path in fish_paths:
    try:
        fish_id = path.name
        print (fish_id)
    
        dlc_path_ = Path(out_path/ '{}_DLC_mod.csv'.format(fish_id)) 
        df_dlc = pd.read_csv(dlc_path_, header=[0,1])
        mask = df_dlc.body.likelihood >=thr
        m_x_list = df_dlc.motor.x.values
        m_y_list = df_dlc.motor.y.values
    
        m_x_lists.append(m_x_list)
        m_y_lists.append(m_y_list)
    
        ## calculate mid head
        mid_headx, mid_heady, left_mid_eye_x, left_mid_eye_y, right_mid_eye_x, right_mid_eye_y = mid_head(df_dlc)
        head_x_ =mid_headx
        head_y_ =mid_heady
        new_head_x = m_x_list + (head_x_ - center_x)/px_mm
        new_head_y = m_y_list + (head_y_ - center_y)/px_mm
    
        # get bout starts
        data_path = list(out_path.glob('*{}_bout_data.h5*'.format(fish_id)))[0]
        data= fl.load(data_path)
    
        ## do calculations and save  
        distances = calculate_distance_traveled(data, new_head_x, new_head_y)
        distances = np.asarray(distances)
        ## input mm output should also be mm
        ### just distances in active tracking episodes
        exp_time = np.arange(0, df_dlc.shape[0], 1)
        start_frames = data['bout_times'][:, 0]  # fetch all start frames from bout_times
        masked_exp_time = exp_time[mask==True]
        
        # Now, check which start frames are in masked_exp_time
        start_frames_in_mask = np.isin(start_frames, masked_exp_time)
        distances_tracking = distances[start_frames_in_mask]
        fl.save(out_path/ '{}_distances.h5'.format(fish_id), distances_tracking)
        distances_.append(distances_tracking)
        
        frequency_hz = calculate_movement_frequency( data['bout_times'],m_x_list, frame_rate=200)
        print(f"Frequency of movement initiation: {frequency_hz:.4f} Hz")
        freq_list.append(frequency_hz)
    except:
        pass

fl.save(save_data_path/ '{}_distances_pooled.h5'.format(exp_name), distances_)
fl.save(save_data_path/ '{}_bout_frequencies.h5'.format(exp_name), freq_list)


240514_f0
Frequency of movement initiation: 1.1321 Hz
240514_f1
Frequency of movement initiation: 0.8969 Hz
240514_f2
Frequency of movement initiation: 0.2917 Hz
240514_f3
Frequency of movement initiation: 1.1982 Hz
240514_f4
Frequency of movement initiation: 0.6735 Hz
240514_f5
Frequency of movement initiation: 0.6912 Hz
240514_f6
Frequency of movement initiation: 0.6863 Hz
240514_f7
Frequency of movement initiation: 0.7206 Hz
240514_f8
Frequency of movement initiation: 1.3305 Hz
240514_f9
Frequency of movement initiation: 1.4145 Hz
