In [None]:
def cartesian_to_spherical(x, y, z):
    r = np.sqrt(x**2 + y**2 + z**2)
    theta = np.arctan2(y, x)  # Azimuth
    phi = np.arccos(z / r)    # Inclination
    return r, theta, phi

def spherical_to_cartesian(r, theta, phi):
    x = r * np.sin(phi) * np.cos(theta)
    y = r * np.sin(phi) * np.sin(theta)
    z = r * np.cos(phi)
    return x, y, z

In [None]:
def filter_large_jumps(df, max_stepsize=0.02, window_size=10):
    # Calculate the step size for each frame
    steps = np.sqrt(df['fishx'].diff()**2 + df['fishy'].diff()**2 + df['fishz'].diff()**2)
    
    # Identify large steps
    large_steps = steps > max_stepsize
    large_step_indices = large_steps[large_steps].index.values

    # print first 5 large steps

    print(f"Found {len(large_step_indices)} large steps")

    # Filter out the large steps
    for index in large_step_indices:
        df.loc[index-window_size:index+window_size, ['fishz', 'fishy', 'fishx']] = np.nan

    return df

In [None]:
def read_files(folder_path):
    all_files = glob.glob(os.path.join(folder_path, "*.csv.gz"))
    all_files = natsort.natsorted(all_files)  # Natural sort files

    fish_data = {}

    for file_path in all_files:
        filename = os.path.basename(file_path)
        fish_id, trial_id = filename.split('_')[0], filename.split('_')[1]

        # Only process the first 3 trials for each fish
        if fish_id not in fish_data:
            fish_data[fish_id] = {}
        if trial_id not in fish_data[fish_id] and len(fish_data[fish_id]) < 3:
            fish_data[fish_id][trial_id] = file_path

    # Debug: Print out the files being processed for each fish
    for fish_id, trials in fish_data.items():
        print(f"Fish ID: {fish_id}, Trials: {list(trials.keys())}")

    return fish_data

In [None]:
def filter_spherical(df, err=0.005, z_offset=0.11):
    # Convert to spherical coordinates
    azimuth, elevation, R = cartesian_to_spherical(df['fishx'], df['fishy'], df['fishz'] - z_offset)

    # Apply spherical filters
    df.loc[R > 0.2 + err, ['fishz', 'fishy', 'fishx']] = np.nan
    df.loc[R < 0.11 - err, ['fishz', 'fishy', 'fishx']] = np.nan

    return df

In [None]:
def process_file(file_path, fHz=100):
    # Load the data
    desired_cols = ['realtime', 'fishx', 'fishy', 'fishz', 'start_time']
    df = pd.read_csv(file_path, compression='gzip', usecols=desired_cols)#, nrows=2000)
    
    # Apply filters
    df = filter_large_jumps(df)

    # df = filter_spherical(df)

    # Smooth the data using savgol_filter
    df['smooth_fishx'] = savgol_filter(df['fishx'], 11, 1)
    df['smooth_fishy'] = savgol_filter(df['fishy'], 11, 1)
    df['smooth_fishz'] = savgol_filter(df['fishz'], 11, 1)

    # Calculate the differences on the smoothed data
    df['dx'] = df['smooth_fishx'].diff().fillna(0)
    df['dy'] = df['smooth_fishy'].diff().fillna(0)
    df['dz'] = df['smooth_fishz'].diff().fillna(0)
    
    df['velocity'] = np.sqrt(df['dx']**2 + df['dy']**2 + df['dz']**2) / (1/fHz)

    #  add time_stamp column
    df = calculate_time_stamp(df)

    #  Call calculate_angles_at_peaks_efficient to get peaks and related calculations
    df, peaks, peak_angles = calculate_angles_at_peaks_efficient(df, fHz)

    # Center angles from -180 to 180 degrees and unwrap them
    wrapped_angles = [(angle + 180) % 360 - 180 for angle in peak_angles]
    unwrapped_angles = np.unwrap(np.radians(wrapped_angles))

    # Calculate cumulative angles
    cumulative_angles = np.cumsum(np.degrees(unwrapped_angles))

    # print(df.tail())

    # Other calculations such as interbout duration, turn bias, etc., are included in calculate_angles_at_peaks_efficient

    return df, peaks, peak_angles, wrapped_angles, unwrapped_angles, cumulative_angles
