<a href="https://colab.research.google.com/github/healthonrails/annolid/blob/main/docs/tutorials/Annolid_batch_post_processinng_tracking_csv_for_masks_ious_and_areas.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Batch Post-processsing of the tracking results CSV files based on mask area and IOUs

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import glob
from pycocotools import mask as mask_util
import ast
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
TRACKING_CSVS_FOLDER = '/content/tracking_results' 

In [None]:
def mask_perimeter(mask):
    """calculate perimeter for a given binary mask
    """
    try:
        mask = mask_util.decode(mask)
    except TypeError:
        mask = ast.literal_eval(mask)
        rle = [mask]
        mask = mask_util.decode(rle)
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP,
                           cv2.CHAIN_APPROX_SIMPLE)
    cnt = contours[0]
    perimeter = cv2.arcLength(cnt, True)
    return perimeter

In [None]:
def mask_iou(this_mask, other_mask):
    """
    Calculate intersection over union between two masks.
    """
    try:
        _iou = mask_util.iou([this_mask],[other_mask],[False,False])
    except Exception:
        this_mask = ast.literal_eval(this_mask)
        other_mask = ast.literal_eval(other_mask)
        _iou = mask_util.iou([this_mask],[other_mask],[False,False])
    return _iou.flatten()[0]

In [None]:
def mask_area(mask):
    """Calulate the area of a RLE mask.
    """
    try:
        area = mask_util.area(mask)
    except TypeError:
        mask = ast.literal_eval(mask)
        area = mask_util.area(mask)
    return area

In [None]:
def iou_values(row,df):
    frame_number = row.frame_number
    df_cur = df[df["frame_number"]==frame_number].sort_values(by=['Unnamed: 0'])
    masks = df_cur['segmentation'].values
    masks = [ast.literal_eval(mask) for mask in masks]
    ious = mask_util.iou(masks, masks, [False]*len(masks))
    ious[np.diag_indices_from(ious)] = 0
    return ious

## Merge overlapping masks 

In [None]:
def merge_overlapped_masks(row, df_overlap, instance_name_prefix=None):

    if instance_name_prefix is None:
        # Assumpution instance name P6_Huddle, P4_Lone, P4_Huddle, etc
        # get the instance anme prefix like P4, P6
        #or you can replace it with the following line 
        # instance_name_prefix = 'P5'
        instance_name_prefix = df_overlap.instance_name.unique()[0].split('_')[0]
        ####
    frame_number = row.frame_number
    df_cur_ = df_overlap[df_overlap.frame_number == frame_number]
    df_cur_.reset_index(inplace=True)
    overlapped_pairs = df_cur_.iou_values.apply(lambda mx: np.nonzero(np.tril(mx)))
    unique_overlapped_masks = []
    for pair in overlapped_pairs:
        om = tuple(set(list(pair[0]) + list(pair[1])))
        unique_overlapped_masks.append(om)
    oms = set(unique_overlapped_masks)
    if len(oms) > 0:
        mask_idxs = list(oms)[0]
        df_cur_ = df_cur_.iloc[list(mask_idxs)]
        masks = df_cur_['segmentation'].values
        masks = [ast.literal_eval(mask) for mask in masks]
        merged_mask = mask_util.merge(masks)
        _instance_name = f'{instance_name_prefix}_Huddle'
        if _instance_name in df_cur_.instance_name.unique():
            merged_idx = df_cur_[df_cur_.instance_name==_instance_name]['Unnamed: 0'].values[0]
            df_overlap.loc[merged_idx, 'segmentation'] = str(merged_mask)
            remove_idx = set(df_cur_['Unnamed: 0'].to_list()) - set([merged_idx])
            if row['Unnamed: 0'] in remove_idx:
                return True
        else:
            merged_idx = df_cur_[df_cur_.instance_name==f'{instance_name_prefix}_Lone']['Unnamed: 0'].values[0]
            df_overlap.loc[merged_idx, 'segmentation'] = str(merged_mask)
            df_overlap.loc[merged_idx, 'instance_name'] = _instance_name
            remove_idx = set(df_cur_['Unnamed: 0'].to_list()) - set([merged_idx])
            if row['Unnamed: 0'] in remove_idx:
                return True
    return False


In [None]:
def paired_distance(frame_number, df,
                    this_instance='Female_95',
                    other_instance='Male_105'):
    df_dis = df[df["frame_number"]==frame_number][['cx','cy','instance_name']]
    df_this = df_dis[df_dis.instance_name == this_instance]
    df_other = df_dis[df_dis.instance_name == other_instance]
    try:
      dist = np.linalg.norm(df_this[['cx','cy']].values-df_other[['cx','cy']].values)
    except:
      dist = None


    return dist

In [None]:
def process_all_results(results_folder=TRACKING_CSVS_FOLDER,
                        csv_file_pattern='*tracking_results*tion.csv',
                        class_score_threshold=0.5,
                        output_dir=None,
                        paired_instances_for_distances=(('Female_95','Male_105'),)
                        ):
    tracking_csv_files = glob.glob(os.path.join(results_folder,csv_file_pattern))
    processed_csvs = []
    for tcf in tracking_csv_files:
        print("Start Processing: ", tcf)
        df = pd.read_csv(tcf)
        instance_names = df['instance_name'].unique()
        print(f'{len(instance_names)} instances with names: {instance_names}')
        df = df[df.class_score > class_score_threshold]
        video_height, video_width = ast.literal_eval(df.iloc[0].segmentation)['size']
        if 'cx' not in df.columns:
            cx = (df.x1 + df.x2)/2
            df['cx'] = cx
        if 'cy' not in df.columns:
            cy = (df.y1 + df.y2)/2
            df['cy'] = cy
        df['mask_perimeter'] = df.segmentation.apply(mask_perimeter)
        df['mask_area'] = df.segmentation.apply(mask_area)
        ##Overlapping masks ---IOU based Mask merging and removing
        df['iou_values'] = df.apply(lambda row: iou_values(row,df),axis=1)
        df['overlap_indxs'] = df.iou_values.apply(lambda iou: np.argwhere(np.tril(iou)>0))
        df['ious'] = df.iou_values.apply(lambda iou_arr: np.sum(np.nonzero(np.tril(iou_arr))))
        df_no_overlap =df[df.ious <= 0]
        # Keep all the no overlapping masks
        df_no_overlap['is_removed'] = False
        df_overlap = df[df.ious > 0]
        df_overlap['is_removed'] = df_overlap.apply(lambda row: merge_overlapped_masks(row,df_overlap),axis=1)
        frames = [df_no_overlap, df_overlap]
        df_final = pd.concat(frames)
        df_final = df_final.drop(df_final[df_final.is_removed==True].index)
        df_final.dropna(inplace=True)

        ##
        if len(paired_instances_for_distances) >= 1:
            try:
                for this_instance, other_instance in paired_instances_for_distances:
                    if this_instance in instance_names and other_instance in instance_names:   
                        df_final[f'dist_{this_instance}_{other_instance}'] = df_final.apply(lambda row:
                            paired_distance(row['frame_number'],df_final,this_instance,
                            other_instance),axis=1)
                    else:
                        print(this_instance, other_instance, " not in instance names.")
            except:
                raise


        print(f"Removed {len(df)-len(df_final)} overlapping predictions")
        # Save the the tracking results with overlapped masks merged and removed.
        res_file_name = os.path.basename(tcf.replace('.csv','_remove_overlapped.csv'))

        # save results to a folder with suffix _processed
        if output_dir is None:
            output_dir = os.path.dirname(tcf) + '_processed'
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
        processed_csv = os.path.join(output_dir,res_file_name)
        df_final.to_csv(processed_csv)
        processed_csvs.append(processed_csv)
        del df
        del df_overlap
        del df_no_overlap
        del df_final
    print(f'Finished processing all the {tracking_csv_files} tracking csv files') 
    return processed_csvs       


In [None]:
processed_csv_files = process_all_results(TRACKING_CSVS_FOLDER,paired_instances_for_distances=(('P6_Huddle','P6_Lone'),))

In [None]:
df_o = pd.read_csv(processed_csv_files[0])

In [None]:
df_o['dist_P6_Huddle_P6_Lone'].describe()

#Downloading files to your local file system

In [None]:
from google.colab import files
tracking_csv_files = glob.glob(TRACKING_CSVS_FOLDER + '/*mask*tracking*.csv')
for tcf in tracking_csv_files:
    files.download(tcf)