# This script shows what the usual process should be
### 1) Load in a raw tracktor csv file
### 2) Implement the key values algorithm to get each shrimp's important info
### 3) Implement the distances algorithm to get each pair's distance at each moment
### 4) Implement the timestamp algorithm on the distance dataframe to get important timestamps
### 5) Save the important timestamps as a csv file to your computer for further use

#### Step 1: Load in the tracked shrimp

In [1]:
# Load packages

import numpy as np
import pandas as pd
import tracktor as tr
import seaborn as sns
import plotly.express as px
import cv2
import sys
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
from itertools import combinations
pd.options.mode.chained_assignment = None

# This is the dataframe that has already been passed through the distance function
# INPUT YOUR OWN FILE
df = pd.read_csv('/Users/lukefields/Desktop/Shrimp-Data/tracktor-data/BlackWhiteResult2full_tracked2.csv')

In [2]:
df

Unnamed: 0.1,Unnamed: 0,frame,pos_x,pos_y,id
0,0,3.0,44.241490,56.520231,A
1,1,3.0,95.951938,58.567442,B
2,2,4.0,44.267095,56.517042,A
3,3,4.0,95.951938,58.567442,B
4,4,5.0,44.254877,56.493248,A
...,...,...,...,...,...
32115,32115,16060.0,185.596863,72.416471,B
32116,32116,16061.0,45.146692,56.564711,A
32117,32117,16061.0,185.596863,72.416471,B
32118,32118,16062.0,45.128125,56.581042,A


#### Step 2) Key Info

In [3]:
def key_values(dataframe):
    df_storage = []


    for identity in dataframe["id"].unique():
        df_id = dataframe[dataframe["id"] == identity]

        dx = df_id['pos_x'] - df_id['pos_x'].shift(2)
        dy = df_id['pos_y'] - df_id['pos_y'].shift(2)
        df_id['speed'] = np.sqrt(dx**2 + dy**2)
        df_id = df_id.fillna(0)
        df_id['cum_dist'] = df_id['speed'].cumsum()
        df_id = df_id.sort_values(by=['frame'])

        important_vals = pd.DataFrame()
        important_vals["ID"] = [df_id["id"].iloc[0]]
        important_vals["Distance Traveled"] = [max(df_id["cum_dist"])]
        important_vals["Average Speed"] = [(df_id["speed"]).mean()]
        important_vals["Max Speed"] = [max(df_id["speed"])]
        important_vals["% Stationary"] = [(len(np.where(df_id["speed"] < 1)[0]) / len(df_id["speed"])) * 100]
        df_storage.append(important_vals) 
        
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)
    return combined_df

In [4]:
key_values(df)

Unnamed: 0,ID,Distance Traveled,Average Speed,Max Speed,% Stationary
0,A,1521.054656,0.094711,23.086793,98.542964
1,B,3999.350659,0.249026,62.481997,96.998755


#### Step 3) Distances

In [5]:
def distances(dataframe, fps):
    df_storage = []
    
    # These lines below compute all of the different pairs of shrimp 
    diff_ids = dataframe["id"].unique()
    shrimp_combo = []
    for combo in combinations(diff_ids, 2):  # 2 for pairs, 3 for triplets, etc
        shrimp_combo.append(combo)
    
    # Loop through each pair of shrimp
    for pair in shrimp_combo:
        shrimpX = pair[0]
        shrimpY = pair[1]

        # This will subset our dataframe into one with just the two animals we care about
        dataframe2 = dataframe[(dataframe["id"] == shrimpX) | (dataframe["id"] == shrimpY)]

        for idx, ID in enumerate(np.unique(dataframe2['id'])):
                dataframe2['id'][dataframe2['id'] == ID] = idx
                #print(dataframe.shape)
        
        # Gets the distance between two shrimp for each frame
        distances = []
        for fr in np.unique(dataframe['frame']):
                tmp = dataframe2[dataframe2['frame'] == fr]
                x = tmp[tmp['id'] == 0]['pos_x'].values[0] - tmp[tmp['id'] == 1]['pos_x'].values[0]
                y = tmp[tmp['id'] == 0]['pos_y'].values[0] - tmp[tmp['id'] == 1]['pos_y'].values[0]
                distances.append(np.sqrt(x**2 + y**2))

        # Creates a timestamp column in seconds 
        timestamp = np.unique(dataframe2['frame'])/fps
        
        # Manipulates the timestamp column for easier-to-read results
        def convert_time(seconds):
            seconds = seconds % (24 * 3600)
            hour = seconds // 3600
            seconds %= 3600
            minutes = seconds // 60
            seconds %= 60     
            milliseconds = (seconds % 1) * 100
            milliseconds
            return "%d:%02d:%02d:%2d" % (hour, minutes, seconds, milliseconds)
        
        convert_time = np.vectorize(convert_time)
        timestamp = convert_time(timestamp)
        frames = np.unique(dataframe2['frame'])
        
        # Getting the list of the different pairs of shrimp
        pair_list = []
        for i in range(len(distances)):
                pair_list.append(pair)

        # Creates our dataframe of shrimp and their respective distance at individual moments
        dist_df = pd.DataFrame([pair_list, frames, timestamp, distances]).transpose()
        dist_df.columns = ["Pair", "Frame", "Timestamp", "Distance"]
        
        df_storage.append(dist_df)
    
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)

    return combined_df

In [6]:
dist_df = distances(df, 60)
dist_df

Unnamed: 0,Pair,Frame,Timestamp,Distance
0,"(A, B)",3.0,0:00:00: 5,51.750956
1,"(A, B)",4.0,0:00:00: 6,51.725498
2,"(A, B)",5.0,0:00:00: 8,51.742474
3,"(A, B)",6.0,0:00:00:10,51.804516
4,"(A, B)",7.0,0:00:00:11,51.776083
...,...,...,...,...
16055,"(A, B)",16058.0,0:04:27:63,141.318775
16056,"(A, B)",16059.0,0:04:27:64,141.398789
16057,"(A, B)",16060.0,0:04:27:66,141.35307
16058,"(A, B)",16061.0,0:04:27:68,141.341886


#### Step 4) Timestamps

In [7]:
def timestamp_from_dist(dataframe, cutoff):
    df_storage = []
    
    for pair in dataframe["Pair"].unique():
        pair_df = dataframe[dataframe["Pair"] == pair]
        
        # Creates a "Previous" column which defines where the shrimp were in the previous frame
        pair_df["Previous"] = (pair_df["Distance"].shift(1)).fillna(pair_df["Distance"])
        enter = []
        exit = []
        pair = pair_df["Pair"].iloc[0]

        # Case 2: The two shrimp are never within the distance threshold
        if (pair_df.loc[pair_df["Distance"] <= cutoff]).empty == True:
            enter.append(0)
            exit.append(0)
            label = [pair]

        # Case 2: The two shrimp are always within the distance threshold    
        elif (len(pair_df.loc[pair_df["Distance"] <= cutoff]) == len(pair_df)) == True:
            enter.append((pair_df["Timestamp"].iloc[0]))
            exit.append((pair_df["Timestamp"].iloc[-1]))
            label = [pair]

        # Case 3: The two shrimp move between being close and not close (most often) 
        else:
            if ((pair_df["Distance"].iloc[0]) <= cutoff) == True:
                enter.append((pair_df["Timestamp"].iloc[0]))
            for index, row in pair_df.iterrows():
                if ((row["Distance"] <= cutoff) & (row["Previous"] > cutoff)) == True:
                    start = row["Timestamp"]
                    enter.append(start)
                elif ((row["Distance"] > cutoff) & (row["Previous"] <= cutoff)) == True:
                    end = row["Timestamp"]
                    exit.append(end)
            if ((pair_df["Distance"].iloc[-1]) <= cutoff) == True:
                exit.append((pair_df["Timestamp"].iloc[-1]))
            label = [pair] * (len(enter))

        important_times = pd.DataFrame()
        important_times["Pair"] = label
        important_times["Start"] = enter
        important_times["End"] = exit
        df_storage.append(important_times)
    
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)
    return combined_df
    

In [10]:
timestamp_df = timestamp_from_dist(dist_df, 70)
timestamp_df

Unnamed: 0,Pair,Start,End
0,"(A, B)",0:00:00: 5,0:00:17:23
1,"(A, B)",0:03:07:94,0:03:08:41


#### Step 5) Save it to your computer

In [275]:
# Then, simply define an output filepath and write it to your computer, call the csv file whatver you want

zebra_filepath = "/Users/lukefields/Desktop/Shrimp-Data/timestamps/" + "zebra-timestamped-12.csv" 

timestamp_df.to_csv(zebra_filepath, sep = ",")