In [None]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
from scipy.stats import norm
import seaborn as sns
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d

In [None]:
def omr_preprocess(data):
    # remove timestamp and extras
    # keep x, y, heading, cumulative, timestamp
    data.columns = ['X_coord', 'Y_coord', 'heading_direction', \
                'cumulative_direction','beat_freq', 'beat_amp', \
                'tail_move?', 'timestamp', 'contrast_level', 'flow_direction']
    new = data.drop(columns = ['beat_freq', 'beat_amp','tail_move?', 'contrast_level', 'flow_direction'])


    # resetting index
    new = pd.DataFrame(new)
    new = new.set_index('timestamp').reset_index()


    # remove time points where there was an angle change of more than pi from one frame to another
    for row in range(len(new)-1):
        heading = new.iloc[row,3]
        next_heading = new.iloc[row+1,3]
        if np.abs(next_heading-heading) >= np.pi:
            new.iloc[row+1,3] = new.iloc[row,3]


    # interpolating and normalising data to a fixed set of points
    interp = pd.DataFrame(columns=['timestamp','X_coord','Y_coord','heading_direction','cumulative_direction'])
    for column in new.columns:
        x = np.arange(0,len(new))
        y = new[column]
        f = interpolate.interp1d(x,y)

        x_new = np.arange(0,20000,1)
        y_new = f(x_new)
        interp[column] = y_new


    # setting first cumulative_angle to zero and ajdusting all others
    interp.iloc[:,4] -= interp.iloc[0,4]


    # calculating distance traveled between each timeframe
    # distance = sqrt((x2-x1)**2 + (y2-y1)**2)
    interp['distance_pts'] = 0
    for row in range(1,len(interp),1):
        distance = np.sqrt((interp['X_coord'][row]-interp['X_coord'][row-1])**2\
                            +(interp['Y_coord'][row]-interp['Y_coord'][row-1])**2)
        interp.iloc[row,5] = distance


    # cleaning the timestamps
    interp.insert(0, 'new_timestamp', range(1, 1 + len(interp)))
    interp = interp.drop(columns=['timestamp']).rename(columns={'new_timestamp':'timestamp'})
    interp['timestamp'] = interp['timestamp']/1000

    return np.array(interp)

In [None]:
raw_me1 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_1_AM.csv')

In [None]:
raw_me1

In [None]:
preproc_df_me1 = pd.DataFrame(omr_preprocess(raw_me1))
preproc_df_me1.columns = ['timestamp', 'X_coord', 'Y_coord','heading_direction', 'cumulative_direction', \
                          'distance_pts']

In [None]:
preproc_df_me1

In [None]:
me1 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_1_AM.csv')
me2 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_2_AM.csv')
me3 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_3_AM.csv')
me4 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_4_AM.csv')
me5 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_5_AM.csv')
me6 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_6_AM.csv')
me7 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_7_AM.csv')
me8 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_8_AM.csv')
me9 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_9_AM.csv')
me10 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_10_AM.csv')
me11 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_11_AM.csv')
me12 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_12_AM.csv')
me13 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_13_AM.csv')
me14 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_14_AM.csv')
me15 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_15_AM.csv')
me16 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_16_AM.csv')
me17 = pd.read_csv('../raw_data/AM/Met/xy_hc_fai_tstp_Me_17_AM.csv')

In [None]:
me1 = omr_preprocess(me1)
me2 = omr_preprocess(me2)
me3 = omr_preprocess(me3)
me4 = omr_preprocess(me4)
me5 = omr_preprocess(me5)
me6 = omr_preprocess(me6)
me7 = omr_preprocess(me7)
me8 = omr_preprocess(me8)
me9 = omr_preprocess(me9)
me10 = omr_preprocess(me10)
me11 = omr_preprocess(me11)
me12 = omr_preprocess(me12)
me13 = omr_preprocess(me13)
me14 = omr_preprocess(me14)
me15 = omr_preprocess(me15)
me16 = omr_preprocess(me16)
me17 = omr_preprocess(me17)

In [None]:
def combine_fish_data(*args):
    '''Combines preprocessed data from all fish into a 3D numpy array'''
    combined_fish = np.stack((args),axis=0)
    return combined_fish

In [None]:
me_data = combine_fish_data(me1,me2,me3,me4,me5,me6,me7,me8,me9,me10,me11,me12,me13,me14,me15,me16,me17)

In [None]:
preproc_df_me1.head()

In [None]:
for fish in me_data:
    fish_distance = fish[:,5]
    fish_distance = gaussian_filter1d(fish_distance,sigma=10)
    fish_distance_2 = []
    for frame in fish_distance:
        if frame<=0.1:
            frame=0
        fish_distance_2.append(frame)
    fish_distance = fish_distance_2

In [None]:
original = me_data[7,:,5]
smooth1 = gaussian_filter1d(me_data[0,:,5],sigma=10)
smooth2 = gaussian_filter1d(me_data[0,:,5],sigma=20)

fig, ax = plt.subplots(1,1,figsize=(18,10))
ax.plot(original, 'gray', lw=0.5, label='original data')
ax.plot(smooth1, '--', color='r',label='filtered, sigma=10')
ax.plot(smooth2, ':', color='b',label='filtered, sigma=20')
plt.xlim(5000,10000)
ax.legend()
ax.grid()
plt.show()

In [None]:
fish = np.array(preproc_df_me1)
fish_distance = fish[:,5]
fish_distance = gaussian_filter1d(fish_distance,sigma=10)
fish_distance_2 = []
for frame in fish_distance:
    if frame<=0.1:
        frame=0
    fish_distance_2.append(frame)
fish_distance = fish_distance_2

In [None]:
original = fish[:,5]
# smooth1 = gaussian_filter1d(fish[:,5],sigma=10)
smooth2 = gaussian_filter1d(fish[:,5],sigma=10)

fig, ax = plt.subplots(1,1,figsize=(18,10))
ax.plot(original, 'gray', lw=0.5, label='original data')
# ax.plot(smooth1, '--', color='r',label='filtered, sigma=10')
ax.plot(smooth2, ':', color='b',label='filtered, sigma=20')
# plt.xlim(20000,22000)
# plt.ylim(0,2)
ax.legend()
ax.grid()
plt.show()

In [None]:
fish = np.array(preproc_df_me1)
fish_distance = fish[:,5]
fish_distance = gaussian_filter1d(fish_distance,sigma=10)
fish_distance_2 = []
for frame in fish_distance:
    if frame<=0.1:
        frame=0
    fish_distance_2.append(frame)
fish_distance = fish_distance_2

# Differentiating the smoothed distance
derivative = np.gradient(fish_distance,edge_order=1)

# Smoothing the derivative
smooth_derivative = gaussian_filter1d(derivative,sigma=1)

# Creating a histogram of smooth derivative 
hist, bin_edges = np.histogram(smooth_derivative,bins='scott')

print(bin_edges[-1]+abs(bin_edges[0]))
fig, ax = plt.subplots(1,1,figsize=(20,10))
# plt.xlim(-0.2,0.2)
sns.histplot(smooth_derivative,bins='scott',kde=True)
plt.show()

In [None]:
np.histogram(smooth_derivative,bins='scott')[1]

In [None]:
fish = np.array(preproc_df_me1)
fish_distance = fish[:,5]
if np.sum(np.isnan(fish_distance)) > 0:
    np.nan_to_num(fish_distance,copy=False,nan=0)
fish_distance_gauss = gaussian_filter1d(fish_distance,sigma=15)

fish_distance_2 = []
for frame in fish_distance_gauss:
    if frame<=0.1:
        frame=0
    fish_distance_2.append(frame)
fish_distance_gauss = fish_distance_2

# Differentiating the smoothed distance
derivative = np.gradient(fish_distance_gauss,edge_order=1)

# Smoothing the derivative
smooth_derivative = gaussian_filter1d(derivative,sigma=1)

# Creating a histogram of smooth derivative 
hist, bin_edges = np.histogram(smooth_derivative,bins='scott')

threshold = (bin_edges[-1]-(bin_edges[0]))*2

fig, ax = plt.subplots(1,1,figsize=(18,10))
ax.plot(fish_distance, 'gray', lw=0.5, label='original data')
ax.plot(fish_distance_gauss, ':', color='b',label='filtered, sigma=5')
ax.axhline(y=threshold, color='r', linestyle='-',lw=0.5,label='threshold line')
# plt.xlim(30000,40000)
# plt.ylim(0,2)
ax.legend()
ax.grid()
plt.show()

peaks = find_peaks(fish_distance_gauss,height=threshold)

print('Threshold: ',threshold)
print('Number of Peaks: ',len(peaks[0]))

## All fish thresholds

In [None]:
for fish in me_data:
    fish_distance = fish[:,5]
    if np.sum(np.isnan(fish_distance)) > 0:
        np.nan_to_num(fish_distance,copy=False,nan=0)
    fish_distance_gauss = gaussian_filter1d(fish_distance,sigma=15)

    fish_distance_2 = []
    for frame in fish_distance_gauss:
        if frame<=0.1:
            frame=0
        fish_distance_2.append(frame)
    fish_distance_gauss = fish_distance_2

    # Differentiating the smoothed distance
    derivative = np.gradient(fish_distance_gauss,edge_order=1)

    # Smoothing the derivative
    smooth_derivative = gaussian_filter1d(derivative,sigma=1)

    # Creating a histogram of smooth derivative 
    hist, bin_edges = np.histogram(smooth_derivative,bins='scott')

    threshold = (bin_edges[-1]-(bin_edges[0]))*2

    fig, ax = plt.subplots(1,1,figsize=(18,10))
    ax.plot(fish_distance, 'gray', lw=0.5, label='original data')
    ax.plot(fish_distance_gauss, ':', color='b',label='filtered, sigma=5')
    ax.axhline(y=threshold, color='r', linestyle='-',lw=0.5,label='threshold line')
    # plt.xlim(30000,40000)
    # plt.ylim(0,2)
    ax.legend()
    ax.grid()
    plt.show()

    peaks = find_peaks(fish_distance_gauss,height=threshold)

    print('Threshold: ',threshold)
    print('Number of Peaks: ',len(peaks[0]))