In [1]:
import numpy as np
import pandas as pd
import tracktor as tr
import seaborn as sns
import plotly.express as px
import cv2
import sys
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
from itertools import combinations
pd.options.mode.chained_assignment = None
import tkinter as tk
from tkinter import *
from tkinter import ttk, filedialog
from tkinter.filedialog import askopenfile
from PIL import ImageTk, Image



root = tk.Tk()
root.title("EEMB DS Capstone")
root.iconbitmap("/Users/lukefields/Desktop/Shrimp-Capstone/pictures/logo-creation/Capstone-logo-w-graphs.png")
root.geometry("800x800")

tabControl = ttk.Notebook(root)

tab1 = ttk.Frame(tabControl)
tab2 = ttk.Frame(tabControl)
tab3 = ttk.Frame(tabControl)
tab4 = ttk.Frame(tabControl)


tabControl.add(tab1, text="Home")
tabControl.add(tab2, text="Movement Statistics")
tabControl.add(tab3, text="Distances Between Shrimp")
tabControl.add(tab4, text="Important Timestamps")
tabControl.pack(expand=1, fill="both")

tab1.rowconfigure(0, minsize=50, weight=1)
tab1.columnconfigure([0, 1, 2], minsize=50, weight=1)

########### TAB 2: KEY VALUES ##########

def key_values(dataframe):
    df_storage = []

    for identity in dataframe["id"].unique():
        df_id = dataframe[dataframe["id"] == identity]

        dx = df_id['pos_x'] - df_id['pos_x'].shift(2)
        dy = df_id['pos_y'] - df_id['pos_y'].shift(2)
        df_id['speed'] = np.sqrt(dx**2 + dy**2)
        df_id = df_id.fillna(0)
        df_id['cum_dist'] = df_id['speed'].cumsum()
        df_id = df_id.sort_values(by=['frame'])

        important_vals = pd.DataFrame()
        important_vals["ID"] = [df_id["id"].iloc[0]]
        important_vals["Distance Traveled"] = [max(df_id["cum_dist"])]
        important_vals["Average Speed"] = [(df_id["speed"]).mean()]
        important_vals["Max Speed"] = [max(df_id["speed"])]
        important_vals["% Stationary"] = [(len(np.where(df_id["speed"] < 1)[0]) / len(df_id["speed"])) * 100]
        df_storage.append(important_vals) 
        
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)
    return combined_df


inst_tab2 = ttk.Label(tab2, text = "Calculate information like shrimp distanced, speed, etc. below!")
inst_tab2.grid(row = 1, column = 0)
browse_tab2 = ttk.Button(master = tab2, text= "Browse and Calculate Key Values", command = lambda:upload_file_tab2())
browse_tab2.grid(row = 2, column=0, sticky="nsew", padx=50, pady=50)
dim_tab2 = tk.Label(tab2, width = 40, text = "", bg = "orange")
dim_tab2.grid(row = 3, column = 0)
tab2_df = []

def upload_file_tab2():
    global df, tab2_df
    file_types = [("CSV files", "*.csv"), ("All", "*.*")]
    file = filedialog.askopenfilename(filetypes = file_types)
    inst_tab2.config(text = file)
    df = pd.read_csv(file)
    df = key_values(df)
    tab2_df = list(df)
    str1 = "Rows:" + str(df.shape[0]) + " , Columns:" + str(df.shape[1])
    dim_tab2.config(text = str1)
    df_treeview_tab2()
    
    
def df_treeview_tab2():
    global df, trv, tab2_df
    r_set = df.to_numpy().tolist()
    trv = ttk.Treeview(tab2, selectmode = "browse", height = 10, show = "headings", columns = tab2_df)
    trv.grid(row = 4, column = 0, columnspan = 3, padx = 15, pady = 25)
    
    for i in tab2_df:
        trv.column(i, width = 130, anchor = "c")
        trv.heading(i, text = str(i))
        
    for dt in r_set:
        v = [r for r in dt]
        trv.insert("", "end", values = v)

ttk.Label(tab2, text = "ID: The identification of the shrimp").grid(row = 5, column = 0)
ttk.Label(tab2, text = "Distance Traveled: Total area covered throughout the video by the shrimp").grid(row = 6, column = 0)
ttk.Label(tab2, text = "Average Speed: How fast the shrimp was traveling on average").grid(row = 7, column = 0)
ttk.Label(tab2, text = "Max Speed: The maximum speed reached by the shrimp").grid(row = 8, column = 0)
ttk.Label(tab2, text = "% Stationary: Percetange of time the shrimp spent not moving").grid(row = 9, column = 0)
    

########### TAB 3: DISTANCES BETWEEN SHRIMP ##########

def distances(dataframe, fps):
    df_storage = []
    
    # These lines below compute all of the different pairs of shrimp 
    diff_ids = dataframe["id"].unique()
    shrimp_combo = []
    for combo in combinations(diff_ids, 2):  # 2 for pairs, 3 for triplets, etc
        shrimp_combo.append(combo)
    
    # Loop through each pair of shrimp
    for pair in shrimp_combo:
        shrimpX = pair[0]
        shrimpY = pair[1]

        # This will subset our dataframe into one with just the two animals we care about
        dataframe2 = dataframe[(dataframe["id"] == shrimpX) | (dataframe["id"] == shrimpY)]

        for idx, ID in enumerate(np.unique(dataframe2['id'])):
                dataframe2['id'][dataframe2['id'] == ID] = idx
                #print(dataframe.shape)
        
        # Gets the distance between two shrimp for each frame
        distances = []
        for fr in np.unique(dataframe['frame']):
                tmp = dataframe2[dataframe2['frame'] == fr]
                x = tmp[tmp['id'] == 0]['pos_x'].values[0] - tmp[tmp['id'] == 1]['pos_x'].values[0]
                y = tmp[tmp['id'] == 0]['pos_y'].values[0] - tmp[tmp['id'] == 1]['pos_y'].values[0]
                distances.append(np.sqrt(x**2 + y**2))

        # Creates a timestamp column in seconds 
        timestamp = np.unique(dataframe2['frame'])/fps
        
        # Manipulates the timestamp column for easier-to-read results
        def convert_time(seconds):
            seconds = seconds % (24 * 3600)
            hour = seconds // 3600
            seconds %= 3600
            minutes = seconds // 60
            seconds %= 60     
            milliseconds = (seconds % 1) * 100
            milliseconds
            return "%d:%02d:%02d:%2d" % (hour, minutes, seconds, milliseconds)
        
        convert_time = np.vectorize(convert_time)
        timestamp = convert_time(timestamp)
        frames = np.unique(dataframe2['frame'])
        
        # Getting the list of the different pairs of shrimp
        pair_list = []
        for i in range(len(distances)):
                pair_list.append(pair)

        # Creates our dataframe of shrimp and their respective distance at individual moments
        dist_df = pd.DataFrame([pair_list, frames, timestamp, distances]).transpose()
        dist_df.columns = ["Pair", "Frame", "Timestamp", "Distance"]
        
        df_storage.append(dist_df)
    
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)

    return combined_df


inst_tab3 = ttk.Label(tab3, text = "Obtain shrimps' distance from one another frame by frame below!")
inst_tab3.grid(row = 1, column = 0)
browse_tab3 = ttk.Button(master = tab3, text= "Browse and Calculate Shrimp Distances over Time",
                         command = lambda:upload_file_tab3())
browse_tab3.grid(row = 2, column=0, sticky="nsew", padx=50, pady=50)
dim_tab3 = tk.Label(tab3, width = 40, text = "", bg = "orange")
dim_tab3.grid(row = 3, column = 0)
tab3_df = []

def upload_file_tab3():
    global df, tab3_df
    file_types = [("CSV files", "*.csv"), ("All", "*.*")]
    file = filedialog.askopenfilename(filetypes = file_types)
    inst_tab3.config(text = file)
    df = pd.read_csv(file)
    df = distances(df, 60)
    tab3_df = list(df)
    str1 = "Rows:" + str(df.shape[0]) + " , Columns:" + str(df.shape[1])
    dim_tab3.config(text = str1)
    df_treeview_tab3()
    
    
def df_treeview_tab3():
    global df, trv, tab3_df
    r_set = df.to_numpy().tolist()
    trv = ttk.Treeview(tab3, selectmode = "browse", height = 10, show = "headings", columns = tab3_df)
    trv.grid(row = 4, column = 0, columnspan = 3, padx = 15, pady = 25)
    
    for i in tab3_df:
        trv.column(i, width = 130, anchor = "c")
        trv.heading(i, text = str(i))
        
    for dt in r_set:
        v = [r for r in dt]
        trv.insert("", "end", iid = v[0], values = v)
    
ttk.Label(tab3, text = "Pair: The two shrimp we are measuring distance between").grid(row = 5, column = 0)
ttk.Label(tab3, text = "Frame: The frame these shrimp are at that distance").grid(row = 6, column = 0)
ttk.Label(tab3, text = "Timestamp: The time (in seconds) these shrimp are at that distance").grid(row = 7, column = 0)
ttk.Label(tab3, text = "Distance: How far the two shrimp are from each other").grid(row = 8, column = 0)
    

########### TAB 4: IMPORTANT TIMESTAMPS ##########

def timestamp_from_dist(dataframe, cutoff):
    df_storage = []
    
    for pair in dataframe["Pair"].unique():
        pair_df = dataframe[dataframe["Pair"] == pair]
        
        # Creates a "Previous" column which defines where the shrimp were in the previous frame
        pair_df["Previous"] = (pair_df["Distance"].shift(1)).fillna(pair_df["Distance"])
        enter = []
        exit = []
        pair = pair_df["Pair"].iloc[0]

        # Case 2: The two shrimp are never within the distance threshold
        if (pair_df.loc[pair_df["Distance"] <= cutoff]).empty == True:
            enter.append(0)
            exit.append(0)
            label = [pair]

        # Case 2: The two shrimp are always within the distance threshold    
        elif (len(pair_df.loc[pair_df["Distance"] <= cutoff]) == len(pair_df)) == True:
            enter.append((pair_df["Timestamp"].iloc[0]))
            exit.append((pair_df["Timestamp"].iloc[-1]))
            label = [pair]

        # Case 3: The two shrimp move between being close and not close (most often) 
        else:
            if ((pair_df["Distance"].iloc[0]) <= cutoff) == True:
                enter.append((pair_df["Timestamp"].iloc[0]))
            for index, row in pair_df.iterrows():
                if ((row["Distance"] <= cutoff) & (row["Previous"] > cutoff)) == True:
                    start = row["Timestamp"]
                    enter.append(start)
                elif ((row["Distance"] > cutoff) & (row["Previous"] <= cutoff)) == True:
                    end = row["Timestamp"]
                    exit.append(end)
            if ((pair_df["Distance"].iloc[-1]) <= cutoff) == True:
                exit.append((pair_df["Timestamp"].iloc[-1]))
            label = [pair] * (len(enter))

        important_times = pd.DataFrame()
        important_times["Pair"] = label
        important_times["Start"] = enter
        important_times["End"] = exit
        df_storage.append(important_times)
    
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)
    return combined_df
    


inst_tab4 = ttk.Label(tab4, text = "Find the timestamps of the video where important moments happen below!")
inst_tab4.grid(row = 1, column = 0)
browse_tab4 = ttk.Button(master = tab4, text= "Browse and Find Important Timestamps", 
                         command = lambda:upload_file_tab4())
browse_tab4.grid(row = 2, column=0, sticky="nsew", padx=50, pady=50)
dim_tab4 = tk.Label(tab4, width = 40, text = "", bg = "orange")
dim_tab4.grid(row = 3, column = 0)
tab4_df = []

def upload_file_tab4():
    global df, tab4_df
    file_types = [("CSV files", "*.csv"), ("All", "*.*")]
    file = filedialog.askopenfilename(filetypes = file_types)
    inst_tab4.config(text = file)
    df = pd.read_csv(file)
    df = timestamp_from_dist(df, 100)
    tab4_df = list(df)
    str1 = "Rows:" + str(df.shape[0]) + " , Columns:" + str(df.shape[1])
    dim_tab4.config(text = str1)
    df_treeview_tab4()
    
    
def df_treeview_tab4():
    global df, trv, tab4_df
    r_set = df.to_numpy().tolist()
    trv = ttk.Treeview(tab4, selectmode = "browse", height = 10, show = "headings", columns = tab4_df)
    trv.grid(row = 4, column = 0, columnspan = 3, padx = 15, pady = 25)
    
    for i in tab4_df:
        trv.column(i, width = 130, anchor = "c")
        trv.heading(i, text = str(i))
        
    for dt in r_set:
        v = [r for r in dt]
        trv.insert("", "end", iid = v[0], values = v)
        
    
ttk.Label(tab4, text = "Pair: The two shrimp that are interacting").grid(row = 5, column = 0)
ttk.Label(tab4, text = "Start: The time the two shrimp start having an important interaction").grid(row = 6, column = 0)
ttk.Label(tab4, text = "End: The time the two shrimp end this important interaction").grid(row = 7, column = 0)
    
    
##################
    

img = Image.open("/Users/lukefields/Desktop/Shrimp-Capstone/pictures/logo-creation/Capstone-logo-w-graphs.png")
my_img = img.resize((500, 380))
orig_img = ImageTk.PhotoImage(my_img)
ttk.Label(tab1, image=orig_img).grid(column=0, row=0, sticky="n")

ttk.Label(tab1, text="Welcome to the EEMB Data Science Capstone GUI!").grid(column=0, row=1)

ttk.Label(tab1, text="Navigate throughout the tabs to obtain key information about the shrimp interactions!").grid(column=0, row=2, pady=20)

ttk.Label(tab1, text="Movement Statistics: Important info like shrimp speed, distance traveled, etc.").grid(column=0, row=3)

ttk.Label(tab1, text="Distances Between Shrimp: The distance between each pair of shrimp across each frame").grid(column=0, row=4)

ttk.Label(tab1, text="Important Timestamps: Ranges of time when interactions are occuring between shrimp").grid(column=0, row=5)

root.mainloop()

2023-04-27 21:08:32.885 Python[22893:922037] +[CATransaction synchronize] called within transaction
2023-04-27 21:08:41.659 Python[22893:922037] +[CATransaction synchronize] called within transaction


In [2]:
import numpy as np
import pandas as pd
import tracktor as tr
import seaborn as sns
import plotly.express as px
import cv2
import sys
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
from itertools import combinations
pd.options.mode.chained_assignment = None

df = pd.read_csv("/Users/lukefields/Desktop/Shrimp-Data/tracktor-data/march-1-4m20s.csv")

def distances(dataframe, fps):
    df_storage = []
    
    # These lines below compute all of the different pairs of shrimp 
    diff_ids = dataframe["id"].unique()
    shrimp_combo = []
    for combo in combinations(diff_ids, 2):  # 2 for pairs, 3 for triplets, etc
        shrimp_combo.append(combo)
    
    # Loop through each pair of shrimp
    for pair in shrimp_combo:
        shrimpX = pair[0]
        shrimpY = pair[1]

        # This will subset our dataframe into one with just the two animals we care about
        dataframe2 = dataframe[(dataframe["id"] == shrimpX) | (dataframe["id"] == shrimpY)]

        for idx, ID in enumerate(np.unique(dataframe2['id'])):
                dataframe2['id'][dataframe2['id'] == ID] = idx
                #print(dataframe.shape)
        
        # Gets the distance between two shrimp for each frame
        distances = []
        for fr in np.unique(dataframe['frame']):
                tmp = dataframe2[dataframe2['frame'] == fr]
                x = tmp[tmp['id'] == 0]['pos_x'].values[0] - tmp[tmp['id'] == 1]['pos_x'].values[0]
                y = tmp[tmp['id'] == 0]['pos_y'].values[0] - tmp[tmp['id'] == 1]['pos_y'].values[0]
                distances.append(np.sqrt(x**2 + y**2))

        # Creates a timestamp column in seconds 
        timestamp = np.unique(dataframe2['frame'])/fps
        
        # Manipulates the timestamp column for easier-to-read results
        def convert_time(seconds):
            seconds = seconds % (24 * 3600)
            hour = seconds // 3600
            seconds %= 3600
            minutes = seconds // 60
            seconds %= 60     
            milliseconds = (seconds % 1) * 100
            milliseconds
            return "%d:%02d:%02d:%2d" % (hour, minutes, seconds, milliseconds)
        
        convert_time = np.vectorize(convert_time)
        timestamp = convert_time(timestamp)
        frames = np.unique(dataframe2['frame'])
        
        # Getting the list of the different pairs of shrimp
        pair_list = []
        for i in range(len(distances)):
                pair_list.append(pair)

        # Creates our dataframe of shrimp and their respective distance at individual moments
        dist_df = pd.DataFrame([pair_list, frames, timestamp, distances]).transpose()
        dist_df.columns = ["Pair", "Frame", "Timestamp", "Distance"]
        
        df_storage.append(dist_df)
    
    combined_df = pd.concat(df_storage, axis = 0, ignore_index = True)

    return combined_df

distances(df, 60)


KeyboardInterrupt

