In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gzip
from scipy.signal import find_peaks

In [2]:
def cart2sph(x, y, z):
    azimuth = np.arctan2(y, x)
    elevation = np.arctan2(z, np.sqrt(x ** 2 + y ** 2))
    R = np.sqrt(x ** 2 + y ** 2 + z ** 2)
    return azimuth, elevation, R

In [3]:

desired_cols = ['fishz', 'fishy', 'fishx', 'realtime']
err = 0.001

def preprocess_data(df, err):
    if 'fishx' not in df.columns or 'fishy' not in df.columns or 'fishz' not in df.columns:
        return None, None

    ddf = df.copy()
    ddfx = df.copy()

    zs = ddfx["fishx"].rolling(window=11, center=True).mean().interpolate(method="pad")
    ys = ddfx["fishy"].rolling(window=11, center=True).mean().interpolate(method="pad")
    xs = ddfx["fishz"].rolling(window=11, center=True).mean().interpolate(method="pad")
    dx = xs.diff().interpolate(method='bfill')
    dy = ys.diff().interpolate(method='bfill')
    dz = zs.diff().interpolate(method='bfill')

    ddf["dx"] = dx
    ddf["dy"] = dy
    ddf["dz"] = dz

    angle_wrappeds = np.arctan2(dy, dx)

    last = 0
    angles = []
    for i in range(len(dx)):
        phi = np.arctan2(dy[i], dx[i])
        while phi < last - np.pi:
            phi += 2 * np.pi
        while phi > last + np.pi:
            phi -= 2 * np.pi
        last = phi
        angles.append(phi)

    angles = np.array(angles)

    return ddf, angles


In [4]:

def plot_velocity(velocity, file):
    plt.figure(figsize=(10, 5))
    plt.plot(velocity)
    plt.xlabel('Time (s)')
    plt.ylabel('Velocity (m/s)')
    plt.title('Velocity')
    plt.savefig(file[:-3] + "_velocity.png")
    plt.close()
    

def plot_velocity_peaks(velocity, peaks, file):
    plt.figure(figsize=(10, 5))
    plt.plot(velocity)
    plt.plot(peaks, velocity[peaks], ".")
    plt.xlabel('Time (s)')
    plt.ylabel('Velocity (m/s)')
    plt.title('Velocity')
    plt.savefig(file[:-3] + "_velocity_peaks.png")
    plt.close()

def plot_angle_peaks(angles_at_peaks, file):
    plt.figure(figsize=(10, 5))
    plt.plot(angles_at_peaks)
    plt.xlabel('Time (s)')
    plt.ylabel('Angle (rad)')
    plt.title('Angle at the peak')
    plt.savefig(file[:-3] + "_angle_peaks.png")
    plt.close()

def plot_angle_diff(angles_at_peaks_diff, file):
    plt.figure(figsize=(10, 5))
    plt.plot(angles_at_peaks_diff)
    plt.xlabel('Time (s)')
    plt.ylabel('Angle (rad)')
    plt.title('Angle at the peak')
    plt.savefig(file[:-3] + "_angle_diff.png")
    plt.close()

def plot_angle_diff_histogram(angles_at_peaks_diff, file):
    plt.figure(figsize=(10, 5))
    plt.hist(angles_at_peaks_diff, bins=50)
    plt.xlabel('Angle (rad)')
    plt.ylabel('Count')
    plt.title('Histogram of the differences between consecutive angles')
    plt.savefig(file[:-3] + "_angle_diff_histogram.png")
    plt.close()
    


In [5]:
def process_file(file):
    # Read data from compressed CSV file
    with gzip.open(file, 'rb') as f:
        df = pd.read_csv(f, usecols=desired_cols)

    if not df.empty:
        ddf, angles = preprocess_data(df, err)

        fHz = 100
        dt = 1 / fHz
        velocity = np.sqrt(ddf["dx"] ** 2 + ddf["dy"] ** 2 + ddf["dz"] ** 2) / dt

        plot_velocity(velocity, file)

        height = (0.1, 0.5)
        frames_btw_2bouts = round(fHz / 10)
        bout_width = round(fHz / 100)
        prominence = 0.05
        peaks, _ = find_peaks(velocity, height=height, distance=frames_btw_2bouts, width=bout_width, prominence=prominence)

        plot_velocity_peaks(velocity, peaks, file)

        angles_at_peaks = angles[peaks]

        plot_angle_peaks(angles_at_peaks, file)

        angles_at_peaks_normalized = np.mod(angles_at_peaks, 2 * np.pi) - np.pi
        angles_at_peaks_unwrapped = np.unwrap(angles_at_peaks_normalized)
        angles_at_peaks_diff = np.diff(angles_at_peaks_unwrapped)
        angles_at_peaks_diff = np.mod(angles_at_peaks_diff + np.pi, 2 * np.pi) - np.pi

        plot_angle_diff(angles_at_peaks_diff, file)
        plot_angle_diff_histogram(angles_at_peaks_diff, file)

        print(f"Graphs saved for file: {file}")
    else:
        print(f"Skipping empty file: {file}")


In [6]:

# Define the path to the folder containing the compressed CSV files
path = "/home/kkumari/PhD/fish-data/long-term-free-swim/"

# Get all compressed CSV files in the folder and sort them by name
all_files = sorted(glob.glob(os.path.join(path, "*.csv.gz")))


In [7]:

# Iterate over each file and process it
for file in all_files:
    process_file(file)


Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/01_T1_1b8cd8200e6211edb285003053fc6914_VR03.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/01_T2_667418920e7311edb1af003053fc6914_VR03.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/01_T3_cf108f460e8411edb1af003053fc6914_VR03.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/01_T4_52e331780e9611edb1af003053fc6914_VR03.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/02_T1_1ce42cc80e6211ed9279003053fc8758_VR04.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/02_T2_683acd880e7311edb6c7003053fc8758_VR04.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/02_T3_d07165360e8411edb6c7003053fc8758_VR04.csv.gz
Graphs saved for file: /home/kkumari/PhD/fish-data/long-term-free-swim/02_T4_540e5cb20e9611edb6c7003053fc8758_VR04.csv.gz
Graphs saved for file: /