### Analyze outliers in detection/prediction results

In [2]:
import pandas as pd
import numpy as np
import os

In [74]:
def read_prediction_dataframe_from_dir(folder: str, is_tracking=False) -> pd.DataFrame:
    file_list = sorted(os.listdir(folder))
    dfs = []
    for i, file_name in enumerate(file_list):
        #print(f)
        columns=["cls", "x", "y", "w", "h", "conf", "id"] if is_tracking else ["cls", "x", "y", "w", "h", "conf"]

        df = pd.read_csv(os.path.join(folder, file_name), sep=" ", header=None, names=columns)
        df["frame"] = i
        df["filename"] = file_name
        keypoints_df = df[df["cls"].isin([31, 32])]
        #print(keypoints_df)
        dfs.append(keypoints_df)

    return pd.concat(dfs)

In [130]:
#def convert_single_tracking_result_from_txt(df: pd.DataFrame):
print(os.getcwd())
keypoints_df = read_prediction_dataframe_from_dir(folder="../../runs/detect/predict6/labels", is_tracking=False)
print(len(keypoints_df))
keypoints_df.head()

/home/adam/Projects/DSR/ultimate_analytics/ultimate-pipeline/notebooks/keypoints
33517


Unnamed: 0,cls,x,y,w,h,conf,frame,filename
0,31,0.699563,0.102965,0.010986,0.019179,0.914857,0,pony_vs_the_killjoys_pool_013_1.txt
9,31,0.342411,0.104897,0.009255,0.016351,0.699574,0,pony_vs_the_killjoys_pool_013_1.txt
12,31,0.731543,0.135071,0.010216,0.017926,0.637236,0,pony_vs_the_killjoys_pool_013_1.txt
13,32,0.519936,0.182404,0.009739,0.016709,0.625701,0,pony_vs_the_killjoys_pool_013_1.txt
16,31,0.310968,0.135658,0.00959,0.016319,0.448016,0,pony_vs_the_killjoys_pool_013_1.txt


In [81]:
conf_threshold = 0.5
cones_threshold = 4

In [83]:
def get_ratio_of_frames_with_less_than_n_cones(df: pd.DataFrame, n: int=4, conf_threshold=conf_threshold) -> float:
    df_filtered = df.groupby('frame').filter(lambda row: len(row[((row["cls"]==31) & (row["conf"] > conf_threshold))]) < n)
    #print(df_filtered['frame'].head())
    return len(df_filtered['frame'].unique())/len(df['frame'].unique())

In [84]:
ratio = get_ratio_of_frames_with_less_than_n_cones(keypoints_df, n=cones_threshold, conf_threshold=0.5)
print(f"Percent of frames with less than {cones_threshold} keypoints with confidence threshold {conf_threshold*100} % --> {ratio*100} %")

Percent of frames with less than 4 keypoints with confidence threshold 50.0 % --> 79.51151753675194 %


In [145]:
ponys_keypoints_df = keypoints_df.copy()
if "filename" in ponys_keypoints_df.columns:
    del ponys_keypoints_df["filename"]
ponys_counts_by_frame = ponys_keypoints_df[ponys_keypoints_df["cls"]==31].groupby("frame")["x"].aggregate('count')
# ponys_keypoints_df = ponys_keypoints_df.rolling(window=10).max()
# ponys_keypoints_df[ponys_keypoints_df < 4]
#ponys_counts_by_frame
ponys_counts_by_frame_series = ponys_counts_by_frame.rolling(window=30).max()
ponys_counts_by_frame_series[ponys_counts_by_frame_series < 4]

frame
3838    3.0
3839    3.0
3840    3.0
3841    3.0
3842    3.0
       ... 
7853    3.0
7854    3.0
8286    3.0
8287    3.0
8288    3.0
Name: x, Length: 1991, dtype: float64

In [134]:
keypoints_df.head()

Unnamed: 0,cls,x,y,w,h,conf,frame,filename
0,31,0.699563,0.102965,0.010986,0.019179,0.914857,0,pony_vs_the_killjoys_pool_013_1.txt
9,31,0.342411,0.104897,0.009255,0.016351,0.699574,0,pony_vs_the_killjoys_pool_013_1.txt
12,31,0.731543,0.135071,0.010216,0.017926,0.637236,0,pony_vs_the_killjoys_pool_013_1.txt
13,32,0.519936,0.182404,0.009739,0.016709,0.625701,0,pony_vs_the_killjoys_pool_013_1.txt
16,31,0.310968,0.135658,0.00959,0.016319,0.448016,0,pony_vs_the_killjoys_pool_013_1.txt


In [76]:
condors_keypoints_df = read_prediction_dataframe_from_dir(folder="../../runs/detect/track2/labels", is_tracking=True)
print(len(condors_keypoints_df))
condors_keypoints_df.head()

19090


Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename
3,32,0.50028,0.276708,0.010053,0.017431,0.955173,4,0,machine_vs_condors_pool_006_1.txt
8,32,0.501809,0.32358,0.010426,0.018345,0.94198,9,0,machine_vs_condors_pool_006_1.txt
13,31,0.299682,0.233273,0.00949,0.017113,0.90437,14,0,machine_vs_condors_pool_006_1.txt
14,31,0.698093,0.22901,0.010256,0.017395,0.889027,15,0,machine_vs_condors_pool_006_1.txt
16,31,0.327827,0.200379,0.009752,0.016872,0.867725,17,0,machine_vs_condors_pool_006_1.txt


In [85]:
ratio = get_ratio_of_frames_with_less_than_n_cones(condors_keypoints_df, n=4, conf_threshold=0.5)
print(f"Percent of frames with less than {cones_threshold} keypoints with confidence threshold {conf_threshold*100} % --> {ratio*100} %")

Percent of frames with less than 4 keypoints with confidence threshold 50.0 % --> 32.686868686868685 %


In [116]:
condors_counts_by_frame = condors_keypoints_df[condors_keypoints_df["cls"]==31].groupby("frame")["id"].aggregate('count')
condors_smoothed_series = condors_counts_by_frame.rolling(window=10).max()
condors_smoothed_series[condors_smoothed_series < 4]

frame
12      3.0
23      3.0
34      3.0
45      3.0
56      3.0
       ... 
2431    3.0
2442    3.0
2453    3.0
2464    3.0
2475    3.0
Name: id, Length: 68, dtype: float64