# This script contains individual functions that can be helpful in timestamping

## 1) A function that calculates distance between two shrimp
## 2) A function that graphs the distance between two shrimp over time
## 3) A function that generates important time ranges when shrimp are close
## 4) A function that generates important timestamps when shrimp are close

#### Before anything, load necessary packages and necessary data

In [16]:
# Load packages

import numpy as np
import pandas as pd
import tracktor as tr
import seaborn as sns
import plotly.express as px
import cv2
import sys
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist

In [None]:
# Load dataframe

df = pd.read_csv("Insert CSV File here")

### 1) A function that calculates distance between two shrimp

In [2]:
# Define distance function

def dist_between(dataframe, fps):  
    # For tracktor purposes (ignore this)
    for idx, ID in enumerate(np.unique(dataframe['id'])):
        dataframe['id'][dataframe['id'] == ID] = idx
       
    # Calculating the distance between shrimp 1 and shrimp 2 
    distances = []
    for fr in np.unique(dataframe['frame']):
        tmp = dataframe[dataframe['frame'] == fr]
        x = tmp[tmp['id'] == 0]['pos_x'].values[0] - tmp[tmp['id'] == 1]['pos_x'].values[0]
        y = tmp[tmp['id'] == 0]['pos_y'].values[0] - tmp[tmp['id'] == 1]['pos_y'].values[0]
        distances.append(np.sqrt(x**2 + y**2))
    
    # Adding timestamps to our dataframe to make it easier to locate on the video 
    timestamp = np.unique(dataframe['frame'])/fps
    frame = np.unique(dataframe["frame"])
    
    # Creating a dataframe with the distance between two shrimp at what time
    dist_df = pd.DataFrame([frame, timestamp, distances]).transpose()
    dist_df.columns = ["Frame", "Timestamp", "Distance"]
    
    return dist_df

In [5]:
# Test on your dataframe to see results, frames per second will usually be 60
dist_between(df, 60)

In [None]:
# To save the results for further use
df_dist = dist_between(df, 60)

### 2) A function that graphs the distance between two shrimp over time

In [6]:
# Define graphing function, uses the dataframe of the distances we just created

def graph_dist(df_dist):
    plt.scatter(df_dist["Timestamp"], df_dist["Distance"], c='#FF7F50', s=5, alpha=0.5)
    plt.xlabel('Time (s)', fontsize=16)
    plt.ylabel('Distance', fontsize=16)
    plt.tight_layout()
    plt.show()

In [None]:
# Test on your dataframe to see visualized results, helpful to get an idea of the shrimp's movement
graph_dist(df_dist)

In [None]:
# To save the results for further use
dist_graph = graph_dist(df_dist)

### 3) A function that generates important frame ranges when shrimp are close

In [10]:
# Define important frame generation function

def important_frame_range(df, dist_cutoff):
    # Create a new column "Previous" that includes the distance between two shrimp in the previous frame
    df["Previous"] = df["Distance"].shift(1)
    
    # Define evaluation statements for our function to see whether shrimp are exiting or entering "importance"
    enter_eval = "Distance <= " + str(dist_cutoff) + " and not (Previous <= " + str(dist_cutoff) + ")"
    exit_eval = "Previous <= " + str(dist_cutoff) + " and not (Distance <= " + str(dist_cutoff) + ")"
    
    # Creating new columns "entering" and "exiting" for intervals
    df["Enter"] = df.eval(enter_eval).cumsum()
    df["Exit"] = df.eval(exit_eval).shift(-1).fillna(True).cumsum()   
    df_temp = df.merge(df.drop_duplicates("Exit", keep = "first")[["Frame", "Exit"]],
                          left_on = "Enter", right_on = "Exit", how = "left", suffixes = ["", "_end"])

    # Identify unique time ranges of entering and exiting importance
    df_temp = df_temp.drop_duplicates(subset = "Enter", keep = "first")[["Frame", "Frame_end"]]
    return df_temp

In [None]:
# Test on your dataframe to see results, helpful to see the frames
important_frame_range(df_dist, dist_cutoff)

In [None]:
# To save the results for further use
frames_imp = important_frame_range(df_dist, dist_cutoff)

### 4) A function that generates important timestamps when shrimp are close

In [None]:
# Define important timestamp generation function

def important_timestamps(df, dist_cutoff):
    # Create a new column "Previous" that includes the distance between two shrimp in the previous frame
    df["Previous"] = df["Distance"].shift(1)
    
    # Define evaluation statements for our function to see whether shrimp are exiting or entering "importance"
    enter_eval = "Distance <= " + str(dist_cutoff) + " and not (Previous <= " + str(dist_cutoff) + ")"
    exit_eval = "Previous <= " + str(dist_cutoff) + " and not (Distance <= " + str(dist_cutoff) + ")"
    
    # Creating new columns "entering" and "exiting" for intervals
    df["Enter"] = df.eval(enter_eval).cumsum()
    df["Exit"] = df.eval(exit_eval).shift(-1).fillna(True).cumsum()   
    df_temp = df.merge(df.drop_duplicates("Exit", keep = "first")[["Timestamp", "Exit"]],
                          left_on = "Enter", right_on = "Exit", how = "left", suffixes = ["", "_end"])

    # Identify unique time ranges of entering and exiting importance
    df_temp = df_temp.drop_duplicates(subset = "Enter", keep = "first")[["Timestamp", "Timestamp_end"]]
    return df_temp

In [None]:
# Test on your dataframe to see results, helpful to see the seconds of video to go to
important_timestamps(df_dist, dist_cutoff)

In [None]:
# To save the results for further use
seconds_imp = important_timestamps(df_dist, dist_cutoff)

### 5) A function that generates a new dataframe with only important frames

In [9]:
# Define function that generates a new dataframe with all "important frames"

def new_important_df(df, dist_cutoff):
    df_imp = df.loc[(df["Distance"] < dist_cutoff)]
    return df_imp

In [None]:
# Test on your dataframe to see results, will return just all frames
df_dist = dist_between(df, 60)

new_important_df(df_dist, dist_cutoff)

In [None]:
# To save the results for further use
df_imp = new_important_df(df_dist, dist_cutoff)

### Saving these dataframes to your computer

In [11]:
# First, establish what you want your file to be called
file_name = "Insert File Name Here"

# Creating the output filepath of where want it to be store
output_filepath = "Insert Folder Path Here/" + file_name + "_Whatever your dataframe is.csv"

In [12]:
##### To further exemplify the process above, here is an example:
# Let's say I was working with a video of shrimp interacting on March 2nd, and the dataframe was called time_imp
# in python, and I wanted to store this data of important timestamps in my shrimp data folder

# First, I would define the file name as something that is easily understandable, like Shrimp 0302
file_name = "shrimp_0302"

# Then, I would define the output filepath as
output_filepath = "/Users/lukefields/Desktop/Shrimp-Capstone/shrimp_data_out/" + file_name + "_timestamps.csv"

# Finally, I would write our python dataframe (name above) to a csv file with the path directly above as follows
time_imp.to_csv(output_filepath, sep=',')

# And you should see your file in your data folder

NameError: name 'time_imp' is not defined