<a href="https://colab.research.google.com/github/healthonrails/annolid/blob/main/docs/tutorials/Annolid_post_processing_distances.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Calculate distances for a pair of instances in the same frame or the same instance across frames

In [None]:
import pandas as pd
import numpy as np
from google.colab import data_table

# Mount you google Drive 

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
!pip install gradio

In [None]:
import gradio as gr

In [None]:
data_table.enable_dataframe_formatter()

#Extact video metadata

In [None]:
import cv2
VIDEO_FILE_PATH = '/gdrive/MyDrive/annolid/Chens_tracked_videos/Nov5_post_Tank7_Tank8/Nov5_post_Tank7_Tank8.mp4'
cap = cv2.VideoCapture(VIDEO_FILE_PATH)

In [None]:
def extract_video_metadata(cap):
    """Extract video metadata
    Args:
        cap (VideoCapture): cv2 VideoCapture object
    Returns:
        dict : dict of video metadata
    """
    meta_data = {
        'frame_width': cap.get(cv2.CAP_PROP_FRAME_WIDTH),
        'frame_height': cap.get(cv2.CAP_PROP_FRAME_HEIGHT),
        'fps': cap.get(cv2.CAP_PROP_FPS),
        'format': cap.get(cv2.CAP_PROP_FORMAT),
        'frame_count': cap.get(cv2.CAP_PROP_FRAME_COUNT),
        'fourcc': cap.get(cv2.CAP_PROP_FOURCC),
        'model': cap.get(cv2.CAP_PROP_MODE)
    }
    return meta_data

video_meta_data = extract_video_metadata(cap)

In [None]:
RATIO_PIX_TO_CM = 1 # with box size
FPS = video_meta_data['fps']
FPS

In [None]:
total_number_frames = video_meta_data['frame_count']
total_number_frames

In [None]:
video_length_in_seconds = total_number_frames / FPS
video_length_in_seconds

# Tracking CSV file location

In [None]:
CSV_FILE = '/gdrive/MyDrive/annolid/Chens_tracked_videos/Nov5_post_Tank7_Tank8/results_trained_on_corrected_keyframes/Nov5_post_Tank7_Tank8_predicted_keyframes_coco_dataset_Nov5_post_Tank7_Tank8_mask_rcnn_tracking_results_with_segmenation.csv'

In [None]:
df = pd.read_csv(CSV_FILE)

In [None]:
df.head()

In [None]:
df_male_92 = df[df.instance_name == 'Male_92']

In [None]:
df_male_92.head()

# Find the instance's last detected location before the given frame

In [None]:
def find_last_show_position(instance_name='Female_52',frame_number=0):
    return df[(df.instance_name == instance_name) & (df.frame_number < frame_number)].sort_values(by='frame_number',ascending=False).head(1)

#e.g. Frame number 387 has one missing instance Femable_95

In [None]:
df[df.frame_number == 387]

# Female_95 was detected in the previous frame 386

In [None]:
find_last_show_position('Female_95',387)

# Find missing frames in the current frame and fill the info with the last detection location

In [None]:
expected_instances = set([name for name in df.instance_name.unique().tolist() if 'ale_' in name])
#please uncomment the following line if you want to use and input your own list
#expected_instances = ['Male_92', 'Female_95', 'Male_105', 'Female_52']
def fill_missing_instance(frame_number): 
    global df
    df_cur = df[df.frame_number == frame_number]
    cur_instances = set([name for name in df_cur.instance_name.unique().tolist() if 'ale_' in name])
    missed_instances = expected_instances - cur_instances    
    if len(missed_instances) >= 1:
        for mi in missed_instances:
            df_prev = find_last_show_position(mi,frame_number)
            df_prev.frame_number = frame_number
            df = pd.concat([df, df_prev])
            print(frame_number,mi)

#Fill the frame with missing instances, it found Female_95 was missing and filled it with previous location

In [None]:
fill_missing_instance(387)

#We can verify that the Female_95 present in frame 387 now as follows.

In [None]:
df[df.frame_number == 387]

# Fill missing instances with the last detection location values for all the frames

In [None]:
df.frame_number.apply(fill_missing_instance)

## Calculate the distance of a pair of instances in a given frame

---

In [None]:
def paired_distance(frame_number,
                    this_instance='Female_95',
                    other_instance='Male_105'):
    df_dis = df[df["frame_number"]==frame_number][['cx','cy','instance_name']]
    df_this = df_dis[df_dis.instance_name == this_instance]
    df_other = df_dis[df_dis.instance_name == other_instance]
    try:
      dist = np.linalg.norm(df_this[['cx','cy']].values-df_other[['cx','cy']].values)
    except:
      dist = None


    return dist

In [None]:
paired_distance(0,'Female_95','Male_105')

In [None]:
instance_names = list(df.instance_name.unique())

In [None]:
iface = gr.Interface(paired_distance,
                     [
                         gr.inputs.Number(),
                         gr.inputs.Dropdown(instance_names),
                         gr.inputs.Dropdown(instance_names),

                     ],
                     [
                         gr.outputs.Label(label="Paired Distance"),
                     ]
                     )
iface.launch()

## Calculate the distance of the instance from the previous frame to the current frame

In [None]:
def instance_distance_between_frame(frame_number,
                                    instance_name='Female_95'):
    if frame_number < 1:
      return 0
    previous_frame_number = frame_number - 1
    df_dis = df[df["frame_number"]==frame_number][['cx','cy','instance_name']]
    df_dis_prev = df[df["frame_number"]==previous_frame_number][['cx','cy','instance_name']]
    df_dis = df_dis[df_dis.instance_name == instance_name]
    df_dis_prev = df_dis_prev[df_dis_prev.instance_name == instance_name]

    try:
      dist = np.linalg.norm(df_dis[['cx','cy']].values-df_dis_prev[['cx','cy']].values)
    except:
      dist = None
    
    return dist
    

In [None]:
df['dist_from_previous_frame_female_95'] = df.frame_number.apply(instance_distance_between_frame,instance_name='Female_95')

In [None]:
df['dist_from_previous_frame_female_95'].describe()

## The total distance traveled for instance female_95 in in pixels

In [None]:
df['dist_from_previous_frame_female_95'].sum()

#For instance example Male_105

In [None]:
df['dist_from_previous_frame_male_105']= df.frame_number.apply(instance_distance_between_frame, instance_name='Male_105')

In [None]:
# Total distance traveled by Male_105 across all the frames
df['dist_from_previous_frame_male_105'].sum()

# Distance traveled and average speed

In [None]:
df['dist_from_previous_frame_female_52']= df.frame_number.apply(instance_distance_between_frame, instance_name='Female_52')

In [None]:
total_distance_traveled_by_female_52 = df['dist_from_previous_frame_female_52'].sum()
print("The total distance traveled by female_52 is: ", total_distance_traveled_by_female_52, "in pixels")

In [None]:
# calculate average speed 
average_speed_of_female_52 = df['dist_from_previous_frame_female_52'].sum()/video_length_in_seconds
print('The average speed of female_52 is: ',average_speed_of_female_52, 'in pixels')

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:

fig = px.line(x=df.frame_number, y=df.dist_from_previous_frame_female_95, labels={'x':'frame_number', 'y':'dist from previous frame female_95'})
fig.show()

In [None]:
fig1 = px.line(x=df.frame_number, y=df.dist_from_previous_frame_female_95.cumsum(), labels={'x':'frame_number', 'y':'cumulative sum of dist from previous frame female_95'})
fig1.show()

## Distance between two instances e.g. female_95 and male_105 in pixels

In [None]:
df['dist_frog__female_95_male_105'] = df.frame_number.apply(paired_distance,this_instance='Female_95',other_instance='Male_105')

In [None]:

fig = px.line(x=df.frame_number, y=df.dist_frog__female_95_male_105, labels={'x':'frame_number', 'y':'distance between frog male in tank 8 and frog female in tank 8'})
fig.show()

In [None]:
df.to_csv('Final_with_distances_Nov5_post_Tank7_Tank8.csv')

In [None]:
df.head()

In [None]:
#https://stackoverflow.com/questions/23199796/detect-and-exclude-outliers-in-a-pandas-dataframe
#------------------------------------------------------------------------------
# accept a dataframe, remove outliers, return cleaned data in a new dataframe
# see http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm
#------------------------------------------------------------------------------
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out

#Average distance between the male and female throughout the video

In [None]:
print("The average distance between male and female throughout the video is ",df.dist_frog__female_95_male_105.mean())

# Histogram of the the distance between male and female throughout the video

In [None]:
df.dist_frog__female_95_male_105.hist()

# Boxplot of the the distance between male and female throughout the video

In [None]:
df.boxplot(column='dist_frog__female_95_male_105')

In [None]:
# calculate the area center points with convext hull polygon
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
def area_used(instance_name='Female_95',vis=True):
    df_center = df[df.instance_name == instance_name][['cx','cy']]
    df_center = remove_outlier(df_center,'cx')
    df_center = remove_outlier(df_center,'cy')
    points = df_center.values
    hull = ConvexHull(points)
    if vis:
        plt.plot(points[:,0], points[:,1], 'o')
        for simplex in hull.simplices:
            plt.plot(points[simplex, 0], points[simplex, 1], 'k-')
    return hull.area
  

In [None]:
area_used('Male_105')

In [None]:
area_used('Male_92')

# Scatter plot for all the instances center point for all the frames

Remove outliers of cx

In [None]:
df_remove_outlier = df[df.groupby("instance_name").cx.transform(lambda x : (x<x.quantile(0.95))&(x>(x.quantile(0.05)))).eq(1)]

Remove outliers of cy

In [None]:
df_remove_outlier = df_remove_outlier[df_remove_outlier.groupby("instance_name").cy.transform(lambda x : (x<x.quantile(0.95))&(x>(x.quantile(0.05)))).eq(1)]

In [None]:

px.scatter(x=df_remove_outlier.cx,y=df_remove_outlier.cy,color=df_remove_outlier.instance_name)

#Remove outliers of the distance between female and male throughout the video 

In [None]:
df_remove_outlier = remove_outlier(df_remove_outlier,'dist_frog__female_95_male_105')

In [None]:
print("The average distance between female and male after removing the outliers is ", df_remove_outlier.dist_frog__female_95_male_105.mean())

In [None]:
df_remove_outlier.dist_frog__female_95_male_105.hist()