In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from mpl_toolkits.mplot3d import Axes3D

from typing import List
import glob
import os
import gzip
from tqdm import tqdm
import seaborn as sns
import natsort
import hashlib
import random


In [None]:
def calc_proximity(df, proximity_threshold):
    df = df.dropna()
    x_rf, y_rf, z_rf = pd.to_numeric(df['xs_rf']), pd.to_numeric(df['ys_rf']), pd.to_numeric(df['zs_rf'])
    x_vf, y_vf, z_vf = pd.to_numeric(df['osg_fish1_x']), pd.to_numeric(df['osg_fish1_y']), pd.to_numeric(df['osg_fish1_z'])

    df.loc[:, 'x_rf'] = x_rf
    df.loc[:, 'y_rf'] = y_rf
    df.loc[:, 'z_rf'] = z_rf
    df.loc[:, 'x_vf'] = x_vf
    df.loc[:, 'y_vf'] = y_vf
    df.loc[:, 'z_vf'] = z_vf

    df['euclidean_distance'] = np.sqrt((x_rf - x_vf) ** 2 + (y_rf - y_vf) ** 2 + (z_rf - z_vf) ** 2)
    df['proximity'] = df['euclidean_distance'] < proximity_threshold

    # print(f'proximity first 10 rows: {df["proximity"].head(10)}')

    return df

In [None]:
def calc_relative_heading(df, angular_threshold_degrees, proximity_threshold):

    df = calc_proximity(df, proximity_threshold)

    df = df.dropna()
    df['dxs_rf'] = df['x_rf'].diff()
    df['dys_rf'] = df['y_rf'].diff()
    df['dxs_vf'] = df['x_vf'].diff()
    df['dys_vf'] = df['y_vf'].diff()

    df['heading_rf'] = np.arctan2(df['dys_rf'], df['dxs_rf'])
    df['heading_vf'] = np.arctan2(df['dys_vf'], df['dxs_vf'])

    df['heading_rf'].fillna(0, inplace=True)
    df['heading_vf'].fillna(0, inplace=True)

    heading_rf = df['heading_rf']
    heading_vf = df['heading_vf']

    unwrap_heading_rf = np.unwrap(np.array(heading_rf))
    unwrap_heading_vf = np.unwrap(np.array(heading_vf))

    relative_angle = unwrap_heading_rf - unwrap_heading_vf
    relative_angle_complement = np.mod(relative_angle + np.pi, 2 * np.pi) - np.pi

    concat_relative_angles = np.vstack((relative_angle, relative_angle_complement))
    concat_abs_relative_angles = np.abs(concat_relative_angles)

    min_indices_relative = np.argmin(concat_abs_relative_angles, axis=0)
    relative_heading = concat_relative_angles[min_indices_relative, np.arange(concat_relative_angles.shape[1])]

    df['relative_heading'] = relative_heading

    angular_threshold_radians = np.radians(angular_threshold_degrees)

    angular_threshold_radians = np.radians(angular_threshold_degrees)
    following = np.abs(relative_heading) <= angular_threshold_radians

    # print(f'following first 10 rows: {following[:10]}')
    df['following'] = following
    percent_following = np.mean(following) * 100
    print(f'Percent following: {percent_following:.2f}%')
    
    return df

In [None]:
def coordinate_transition(df, heading_vf):
    x_in, y_in = df['x_rf'] - df['x_vf'], df['y_rf'] - df['y_vf']
    x_o = x_in * np.cos(heading_vf) + y_in * np.sin(heading_vf)
    y_o = -x_in * np.sin(heading_vf) + y_in * np.cos(heading_vf)

    df['x_o'] = x_o
    df['y_o'] = y_o

    # print(f'x_o: {x_o[:10]}')
    # print(f'y_o: {y_o[:10]}')

    return df

In [None]:
def rf_behind_vf(df, angular_threshold_degrees, proximity_threshold, x_limit=0, y_limit=0, ):

    df = calc_relative_heading(df, angular_threshold_degrees, proximity_threshold)

    vf_heading = df['heading_vf'].values

    df = coordinate_transition(df, vf_heading)
    df['rf_behind_vf'] = (df['x_o'] < x_limit) & (df['y_o'] < y_limit)
    rf_behind_vf = df['rf_behind_vf'].values
    # print(f'first 10 rows of rf_behind_vf: {rf_behind_vf[:10]}')
    print(f'proportion of time fish is behind: {np.mean(rf_behind_vf)}')

    return df

In [None]:
def frames30_following(df, streak_length_threshold):
    # Use the boolean values directly
    all_conditions = df['following'] & df['proximity'] & df['rf_behind_vf']

    streak_lengths = []
    counter = 0
    for value in all_conditions:
        if value:
            counter += 1
        else:
            if counter >= streak_length_threshold:
                streak_lengths.append(counter)
            counter = 0
    if counter >= streak_length_threshold:
        streak_lengths.append(counter)

    streaks_over_threshold = np.array(streak_lengths)

    print(f'streaks over threshold: {streaks_over_threshold}')
    print(f'number of streaks over threshold: {len(streaks_over_threshold)}')

    return streaks_over_threshold 


In [None]:
def cartesian_to_polar(x, y):
    r = np.sqrt(x**2 + y**2)
    theta = np.arctan2(y, x)
    return r, theta

In [None]:
def plot_data(df):
    # 3D scatter plot of positions
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(df['x_rf'], df['y_rf'], df['z_rf'], c='r', marker='o', label='Real Fish', s=0.2)
    ax.scatter(df['x_vf'], df['y_vf'], df['z_vf'], c='b', marker='^', label='Virtual Fish', s= 0.2)
    ax.legend()
    # plt.show()

    # Histogram of euclidean distances
    fig = plt.figure()
    plt.hist(df['euclidean_distance'], bins=100, label='Euclidean Distance')
    plt.legend()
    plt.show()

    # Plot of proximity over time
    fig = plt.figure()
    plt.plot(df['proximity'], label='Proximity')
    plt.legend()
    plt.show()

    # Plot of relative heading over time
    fig = plt.figure(figsize=(15, 5))
    plt.plot(df['relative_heading'], label='Relative Heading')
    plt.legend()
    plt.show()

    # Plot of following over time (first 1000 data points)
    fig = plt.figure(figsize=(15, 5))
    plt.plot(df['following'][:1000], label='Following')
    plt.legend()
    plt.show()

    # Histogram of relative heading
    fig = plt.figure(figsize=(15, 5))
    plt.hist(df['relative_heading'], bins=100, label='Relative Heading')
    plt.legend()
    plt.show()

    # Histogram of streaks over 30
    streaks_over_30 = frames30_following(df, 30)
    fig = plt.figure(figsize=(15, 5))
    plt.hist(streaks_over_30, bins=100, label='Streaks Over 30')
    plt.xlabel('Streak Length')
    plt.ylabel('Frequency')
    plt.title('Histogram of Streak Lengths Over 30')
    plt.legend()
    plt.show()

In [None]:
def read_files(folder_path):

    all_files = glob.glob(os.path.join(folder_path + "/*.csv"))
    all_files = natsort.natsorted(all_files)

    fish_data_list = []

    for file_path in tqdm(all_files):
        filename = os.path.basename(file_path)
        print(filename)

        fish_data_list.append(file_path)
 
    return fish_data_list


In [None]:
desired_cols = ['osg_fish1_x','osg_fish1_y', 'osg_fish1_z','framenumber','real_fish_x', 'real_fish_y', 'real_fish_z','velocity', 'Stim_Flag', 'Stim_Flag_dir', 'exp_uuid', 'frame_diff', 'steps',
       'xs_rf', 'ys_rf', 'zs_rf']

folder_path = r"C:\PhD\experiments\fish_experiments\FishSizeKinematics\processed_data"
output_filtered_data_path = r'C:\PhD\experiments\fish_experiments\FishSizeKinematics\processed_filtered_data'
fish_data_list = read_files(folder_path)
# fish_data_list

In [None]:
# # Define your ranges
# angles = [30, 45, 60, 90]
# proximity_thresholds = [0.01, 0.02, 0.03, 0.04, 0.05]
# streak_length_thresholds = [20, 30, 40]

# results = []

# # Iterate over each file
# for file_path in tqdm(fish_data_list):
#     df = pd.read_csv(file_path, usecols=desired_cols)
    
#     # Iterate over each angle
#     for angle in angles:
#         # Iterate over each proximity threshold
#         for proximity_threshold in proximity_thresholds:
#             # Iterate over each streak length threshold
#             for streak_length_threshold in streak_length_thresholds:
#                 # Calculate rf_behind_vf_df based on the current angle and proximity threshold
#                 rf_behind_vf_df = rf_behind_vf(df, angle, proximity_threshold, x_limit=0, y_limit=0)
                
#                 # Get the streak lengths over the threshold
#                 streaks_over_threshold = frames30_following(rf_behind_vf_df, streak_length_threshold)
                
#                 # Append the results to the list
#                 results.append({
#                     'file_path': file_path,
#                     'angle': angle,
#                     'proximity_threshold': proximity_threshold,
#                     'streak_length_threshold': streak_length_threshold,
#                     'streak_lengths': streaks_over_threshold
#                 })

# # Convert the list of results to a DataFrame
# streaks_df = pd.DataFrame(results)
# # Add a new column 'number_of_streaks' to the DataFrame
# streaks_df['number_of_streaks'] = streaks_df['streak_lengths'].apply(len)
# streaks_df['filename'] = streaks_df['file_path'].apply(lambda x: os.path.basename(x))
# streaks_df.to_csv(r"C:\PhD\experiments\fish_experiments\FishSizeKinematics\streaks_df.csv", index=False)
    