### Fish tested twice in the morning and twice in the afternoon (once with E2, once with Met)

- Paired t-test

In [1]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
from scipy import stats
import seaborn as sns
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d

In [10]:
def divide_data_by_flow_direction(data):
    '''Dividing raw data from one fish into 2 dataframes with either left or right OMR flow'''
    data.columns = ['X_coord', 'Y_coord', 'heading_direction', \
                'cumulative_direction','beat_freq', 'beat_amp', \
                'tail_move?', 'timestamp', 'contrast_level', 'flow_direction']
    right = pd.DataFrame(data[data.flow_direction == 1])
    left = pd.DataFrame(data[data.flow_direction == 2])

    return right, left

def divide_data_by_contrast(data):
    '''Dividing raw data from one fish and one flow direction into contrast levels'''
    C_0 = pd.DataFrame(data[data.contrast_level == 0])
    C_01 = pd.DataFrame(data[data.contrast_level == 0.01])
    C_1 = pd.DataFrame(data[data.contrast_level == 0.1])
    C_2 = pd.DataFrame(data[data.contrast_level == 0.2])
    C_3 = pd.DataFrame(data[data.contrast_level == 0.3])
    C_5 = pd.DataFrame(data[data.contrast_level == 0.5])
    C_7 = pd.DataFrame(data[data.contrast_level == 0.7])
    C_10 = pd.DataFrame(data[data.contrast_level == 1])
    return C_0, C_01, C_1, C_2, C_3, C_5, C_7, C_10

def omr_preprocess(data):
    # remove timestamp and extras
    # keep x, y, heading, cumulative, timestamp
    new = data.drop(columns = ['beat_freq', 'beat_amp','tail_move?','contrast_level','flow_direction'])


    # resetting index
    new = pd.DataFrame(new)
    new = new.set_index('timestamp').reset_index()


    # remove time points where there was an angle change of more than pi from one frame to another
    for row in range(len(new)-1):
        heading = new.iloc[row,3]
        next_heading = new.iloc[row+1,3]
        if np.abs(next_heading-heading) >= np.pi:
            new.iloc[row+1,3] = new.iloc[row,3]


    # interpolating and normalising data to a fixed set of points
    interp = pd.DataFrame(columns=['timestamp','X_coord','Y_coord','heading_direction','cumulative_direction'])
    for column in new.columns:
        x = np.arange(0,len(new))
        y = new[column]
        f = interpolate.interp1d(x,y)

        x_new = np.arange(0,3000,1)
        y_new = f(x_new)
        interp[column] = y_new


    # setting first cumulative_angle to zero and ajdusting all others
    interp.iloc[:,4] -= interp.iloc[0,4]


    # calculating distance traveled between each timeframe
    # distance = sqrt((x2-x1)**2 + (y2-y1)**2)
    interp['distance_pts'] = 0
    for row in range(1,len(interp),1):
        distance = np.sqrt((interp['X_coord'][row]-interp['X_coord'][row-1])**2\
                            +(interp['Y_coord'][row]-interp['Y_coord'][row-1])**2)
        interp.iloc[row,5] = distance


    # cleaning the timestamps
    interp.insert(0, 'new_timestamp', range(1, 1 + len(interp)))
    interp = interp.drop(columns=['timestamp']).rename(columns={'new_timestamp':'timestamp'})
    interp['timestamp'] = interp['timestamp']/1000

    return np.array(interp)

def combine_fish_data(*args):
    '''Combines preprocessed data from all fish into a 3D numpy array'''
    combined_fish = np.stack((args),axis=0)
    return combined_fish

In [3]:
fish1_E2_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish1_E2_AM.csv')
fish2_E2_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish2_E2_AM.csv')
fish3_E2_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish3_E2_AM.csv')

fish1_Met_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish1_Me_AM.csv')
fish2_Met_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish2_Me_AM.csv')
fish3_Met_AM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish3_Me_AM.csv')

fish1_E2_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish1_E2_PM.csv')
fish2_E2_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish2_E2_PM.csv')
fish3_E2_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish3_E2_PM.csv')
fish4_E2_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish4_E2_PM.csv')

fish1_Met_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish1_Me_PM.csv')
fish2_Met_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish2_Me_PM.csv')
fish3_Met_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish3_Me_PM.csv')
fish4_Met_PM = pd.read_csv('../raw_data/Paired_fish/xy_hc_fai_tstp_fish4_Me_PM.csv')

In [4]:
for e in range(1,4):
    exec(f'LE2_fish{e}_AM,RE2_fish{e}_AM = divide_data_by_flow_direction(fish{e}_Met_AM)')
    exec(f'LMe_fish{e}_AM,RMe_fish{e}_AM = divide_data_by_flow_direction(fish{e}_Met_AM)')
for e in range(1,5):
    exec(f'LE2_fish{e}_PM,RE2_fish{e}_PM = divide_data_by_flow_direction(fish{e}_Met_PM)')
    exec(f'LMe_fish{e}_PM,RMe_fish{e}_PM = divide_data_by_flow_direction(fish{e}_Met_PM)')

In [5]:
# LE2_fish1_AM --> LE2_fish1_AM_C01

for medium in ['E2','Me']:
    for di in ['R','L']:
        for i in range(1,4):
            exec(f'L{medium}_fish{i}_AM_C_0, L{medium}_fish{i}_AM_C_1, L{medium}_fish{i}_AM_C_10, \
                L{medium}_fish{i}_AM_C_20, L{medium}_fish{i}_AM_C_30, L{medium}_fish{i}_AM_C_50, \
                L{medium}_fish{i}_AM_C_70, L{medium}_fish{i}_AM_C_100 \
                = divide_data_by_contrast(L{medium}_fish{i}_AM)')
            exec(f'R{medium}_fish{i}_AM_C_0, R{medium}_fish{i}_AM_C_1, R{medium}_fish{i}_AM_C_10, \
                R{medium}_fish{i}_AM_C_20, R{medium}_fish{i}_AM_C_30, R{medium}_fish{i}_AM_C_50, \
                R{medium}_fish{i}_AM_C_70, R{medium}_fish{i}_AM_C_100 \
                = divide_data_by_contrast(L{medium}_fish{i}_AM)')
        for i in range(1,5):
            exec(f'L{medium}_fish{i}_PM_C_0, L{medium}_fish{i}_PM_C_1, L{medium}_fish{i}_PM_C_10, \
                L{medium}_fish{i}_PM_C_20, L{medium}_fish{i}_PM_C_30, L{medium}_fish{i}_PM_C_50, \
                L{medium}_fish{i}_PM_C_70, L{medium}_fish{i}_PM_C_100 \
                = divide_data_by_contrast(L{medium}_fish{i}_PM)')
            exec(f'R{medium}_fish{i}_PM_C_0, R{medium}_fish{i}_PM_C_1, R{medium}_fish{i}_PM_C_10, \
                R{medium}_fish{i}_PM_C_20, R{medium}_fish{i}_PM_C_30, R{medium}_fish{i}_PM_C_50, \
                R{medium}_fish{i}_PM_C_70, R{medium}_fish{i}_PM_C_100 \
                = divide_data_by_contrast(L{medium}_fish{i}_PM)')

In [6]:
for medium in ['E2','Me']:
    for flow in ['R','L']:
        for contrast in ['C_1','C_10','C_20','C_30','C_50','C_70','C_100']:
            for i in range(1,4):
                exec(f'{flow}{medium}_fish{i}_AM_{contrast} = omr_preprocess({flow}{medium}_fish{i}_AM_{contrast})')
            for i in range(1,5):
                exec(f'{flow}{medium}_fish{i}_PM_{contrast} = omr_preprocess({flow}{medium}_fish{i}_PM_{contrast})')

In [9]:
pd.DataFrame(LE2_fish1_PM_C_1)

Unnamed: 0,0,1,2,3,4,5
0,0.001,519.2766,266.9534,2.853629,0.00000,0.000000
1,0.002,519.2481,266.8655,2.858926,-0.00531,0.092405
2,0.003,519.1266,266.9127,2.913736,-0.06012,0.130346
3,0.004,519.2237,266.7898,2.919420,-0.06580,0.156630
4,0.005,519.0538,266.8356,3.095066,-0.24144,0.175965
...,...,...,...,...,...,...
2995,2.996,486.3149,591.1649,2.296716,-5.72628,0.024122
2996,2.997,486.2776,591.2691,2.168903,-5.59847,0.110675
2997,2.998,486.4947,591.1838,2.161209,-5.59077,0.233256
2998,2.999,486.3735,591.1667,2.087941,-5.51751,0.122400


In [23]:
for medium in ['E2','Me']:
    for flow in ['R','L']:
        for contrast in ['C_1','C_10','C_20','C_30','C_50','C_70','C_100']:
            exec(f'combined_{flow}_{medium}_{contrast}_AM =\
                combine_fish_data({flow}{medium}_fish1_AM_{contrast},{flow}{medium}_fish2_AM_{contrast},\
                {flow}{medium}_fish3_AM_{contrast})')
            exec(f'combined_{flow}_{medium}_{contrast}_PM =\
                combine_fish_data({flow}{medium}_fish1_PM_{contrast},{flow}{medium}_fish2_PM_{contrast},\
                {flow}{medium}_fish3_PM_{contrast},{flow}{medium}_fish4_PM_{contrast})')

In [24]:
combined_L_E2_C_1_PM.shape

(4, 3000, 6)

In [None]:
stats.ttest_rel()