### Analyze outliers in detection/prediction results on test data

In [4]:
import pandas as pd
import numpy as np
import os

In [5]:
def read_prediction_dataframe_from_dir(folder: str, is_tracking=False) -> pd.DataFrame:
    file_list = sorted(os.listdir(folder))
    dfs = []
    for i, file_name in enumerate(file_list):
        #print(f)
        columns=["cls", "x", "y", "w", "h", "conf", "id"] if is_tracking else ["cls", "x", "y", "w", "h", "conf"]

        df = pd.read_csv(os.path.join(folder, file_name), sep=" ", header=None, names=columns)
        df["frame"] = i
        df["filename"] = file_name
        keypoints_df = df[df["cls"] >= 31]
        dfs.append(keypoints_df)

    return pd.concat(dfs)

# 13-classes keypoints model

### Pony vs The Killjoy - Pool 013
#### 2 keypoints classes variant: cone (31), midpoint(32)
Loading prediction/tracking data

In [6]:
#print(os.getcwd())
keypoints_df = read_prediction_dataframe_from_dir(folder="../../runs/detect/track3/labels", is_tracking=True)
print(len(keypoints_df))
keypoints_df.head(5)

55810


Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename
3,31,0.69934,0.103258,0.009634,0.017423,0.943481,4,0,pony_vs_the_killjoys_pool_013_1.txt
7,32,0.518308,0.306945,0.014788,0.026235,0.909432,8,0,pony_vs_the_killjoys_pool_013_1.txt
9,31,0.342385,0.105246,0.009379,0.017071,0.889964,10,0,pony_vs_the_killjoys_pool_013_1.txt
10,32,0.519433,0.232265,0.011592,0.020488,0.88718,11,0,pony_vs_the_killjoys_pool_013_1.txt
11,31,0.311012,0.13641,0.008984,0.016713,0.880367,12,0,pony_vs_the_killjoys_pool_013_1.txt


In [7]:
conf_threshold = 0.5
cones_threshold = 4

In [8]:
def get_ratio_of_frames_with_less_than_n_cones(df: pd.DataFrame, n: int=4, conf_threshold=conf_threshold) -> float:
    df_filtered = df.groupby('frame').filter(lambda group: len(group[((group["cls"]==31) & (group["conf"] > conf_threshold))]) < n)
    #print(df_filtered['frame'].head())
    return len(df_filtered['frame'].unique())/len(df['frame'].unique())

In [9]:
ratio = get_ratio_of_frames_with_less_than_n_cones(keypoints_df, n=cones_threshold, conf_threshold=0.6)
print(f"Percent of frames with less than {cones_threshold} keypoints with confidence threshold {conf_threshold*100} % --> {ratio*100} %")

Percent of frames with less than 4 keypoints with confidence threshold 50.0 % --> 18.90265076976502 %


In [10]:
ponys_keypoints_df = keypoints_df.copy()
if "filename" in ponys_keypoints_df.columns:
    del ponys_keypoints_df["filename"]
ponys_counts_by_frame = ponys_keypoints_df[ponys_keypoints_df["cls"]==31].groupby("frame")["id"].aggregate('count')
# ponys_keypoints_df = ponys_keypoints_df.rolling(window=10).max()
# ponys_keypoints_df[ponys_keypoints_df < 4]
#ponys_counts_by_frame
ponys_counts_by_frame_series = ponys_counts_by_frame.rolling(window=15).max()
ponys_counts_by_frame_series[ponys_counts_by_frame_series < 4]

Series([], Name: id, dtype: float64)

In [11]:
keypoints_df.head()

Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename
3,31,0.69934,0.103258,0.009634,0.017423,0.943481,4,0,pony_vs_the_killjoys_pool_013_1.txt
7,32,0.518308,0.306945,0.014788,0.026235,0.909432,8,0,pony_vs_the_killjoys_pool_013_1.txt
9,31,0.342385,0.105246,0.009379,0.017071,0.889964,10,0,pony_vs_the_killjoys_pool_013_1.txt
10,32,0.519433,0.232265,0.011592,0.020488,0.88718,11,0,pony_vs_the_killjoys_pool_013_1.txt
11,31,0.311012,0.13641,0.008984,0.016713,0.880367,12,0,pony_vs_the_killjoys_pool_013_1.txt


### Machine vs Condors - Pool 006
#### 2 keypoints classes variant: cone (31), midpoint(32)
Loading prediction/tracking data

In [12]:
condors_keypoints_df = read_prediction_dataframe_from_dir(folder="../../runs/detect/track2/labels", is_tracking=True)
print(len(condors_keypoints_df))
condors_keypoints_df.head()

19090


Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename
3,32,0.50028,0.276708,0.010053,0.017431,0.955173,4,0,machine_vs_condors_pool_006_1.txt
8,32,0.501809,0.32358,0.010426,0.018345,0.94198,9,0,machine_vs_condors_pool_006_1.txt
13,31,0.299682,0.233273,0.00949,0.017113,0.90437,14,0,machine_vs_condors_pool_006_1.txt
14,31,0.698093,0.22901,0.010256,0.017395,0.889027,15,0,machine_vs_condors_pool_006_1.txt
16,31,0.327827,0.200379,0.009752,0.016872,0.867725,17,0,machine_vs_condors_pool_006_1.txt


In [13]:
ratio = get_ratio_of_frames_with_less_than_n_cones(condors_keypoints_df, n=4, conf_threshold=0.5)
print(f"Percent of frames with less than {cones_threshold} keypoints with confidence threshold {conf_threshold*100} % --> {ratio*100} %")

Percent of frames with less than 4 keypoints with confidence threshold 50.0 % --> 32.686868686868685 %


In [14]:
condors_counts_by_frame = condors_keypoints_df[condors_keypoints_df["cls"]==31].groupby("frame")["id"].aggregate('count')
window_len = 10
cones_threshold = 4
condors_smoothed_series = condors_counts_by_frame.rolling(window=window_len).max()
print(f"Number of images with fewer than {4} cones inside rolling window of length={window_len}: {len(condors_smoothed_series[condors_smoothed_series < cones_threshold])}")

Number of images with fewer than 4 cones inside rolling window of length=10: 68


# 13-classes keypoints model

### Pony vs The Killjoy - Pool 013
#### 13 keypoints classes variant
Loading prediction/tracking data

In [15]:
ponys_13cls_df = read_prediction_dataframe_from_dir(folder="../../runs/detect/track4/labels", is_tracking=True)
print(len(ponys_13cls_df))
ponys_13cls_df.head(5)

22643


Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename
2,36,0.518324,0.271519,0.00946,0.01647,0.969942,3,0,pony_vs_the_killjoys_pool_004_1.txt
4,34,0.321885,0.224127,0.008479,0.01569,0.96504,5,0,pony_vs_the_killjoys_pool_004_1.txt
11,33,0.522141,0.206899,0.008848,0.015378,0.927504,12,0,pony_vs_the_killjoys_pool_004_1.txt
12,31,0.352514,0.190828,0.008629,0.014922,0.900049,13,0,pony_vs_the_killjoys_pool_004_1.txt
14,32,0.694432,0.191096,0.008541,0.015429,0.889788,15,0,pony_vs_the_killjoys_pool_004_1.txt


Eligible pairs of keypoints that run on parallel lines:
- 31, 32 (TLC,TRC)
- 34, 35 (TLF, TRF)
- 39, 40 (BLF, BRF)
- 41, 42 (BLC, BRC; practically never visible)

In [16]:
candidate_clsid_pairs = np.array([[31,32],[34,35],[39,40],[41,42]])
keypoint_lines_map = {31: "TC", 32: "TC", 34: "TF", 35: "TF", 39: "BF", 40: "BF", 41: "BC", 42: "BC"}
single_frame_df = ponys_13cls_df[ponys_13cls_df["frame"]==0]
single_frame_df = single_frame_df[(single_frame_df["cls"].isin(candidate_clsid_pairs.flatten())) & (single_frame_df["conf"] > conf_threshold)]
single_frame_df["keypoint_line"] = single_frame_df["cls"].apply(lambda c: keypoint_lines_map[c])
keypoint_counts = single_frame_df.groupby("keypoint_line")["cls"].aggregate("count")
print(len(keypoint_counts[keypoint_counts > 1]))
keypoint_counts[keypoint_counts > 1]


2


keypoint_line
TC    2
TF    2
Name: cls, dtype: int64

In [17]:
def get_number_of_keypoint_pairs(group: pd.DataFrame) -> int:
    mask = (group["cls"].isin(candidate_clsid_pairs.flatten())) & (group["conf"] > conf_threshold)
    candidate_cls_df = group[mask].copy(deep=False)
    #df2 = group.loc[mask, :]
    candidate_cls_df["keypoint_line"] = candidate_cls_df["cls"].apply(lambda c: keypoint_lines_map[c])
    keypoint_counts = candidate_cls_df.groupby("keypoint_line")["cls"].aggregate("count")
    return len(keypoint_counts[keypoint_counts >= 2])

def get_ratio_of_frames_with_fewer_than_2x2_keypoints(df: pd.DataFrame, n: int=2, conf_threshold=conf_threshold) -> float:
    df_filtered = df.groupby('frame').filter(lambda g: get_number_of_keypoint_pairs(g) < n)
    return len(df_filtered['frame'].unique())/len(df['frame'].unique())

In [18]:
ratio = get_ratio_of_frames_with_fewer_than_2x2_keypoints(ponys_13cls_df, n=2, conf_threshold=0.6)
print(f"Percent of frames with fewer than 2 pairs of keypoints with confidence threshold {conf_threshold*100} % --> {ratio*100} %")

Percent of frames with fewer than 2 pairs of keypoints with confidence threshold 50.0 % --> 4.192229038854806 %


In [19]:
insufficient_pairs = ponys_13cls_df.groupby('frame').apply(get_number_of_keypoint_pairs, include_groups=True)
print("Frames with fewer than 2 pairs of keypoints with confidence threshold")
insufficient_pairs[insufficient_pairs < 2].head(5)

Frames with fewer than 2 pairs of keypoints with confidence threshold


  insufficient_pairs = ponys_13cls_df.groupby('frame').apply(get_number_of_keypoint_pairs, include_groups=True)


frame
122    1
132    1
137    1
152    1
172    1
dtype: int64

In [20]:
# See https://stackoverflow.com/questions/63737187/how-to-apply-a-custom-rolling-function-to-pandas-groupby
#ponys_13cls_df.groupby("frame").rolling(window=5).apply(lambda g: 1.0)
#ponys_13cls_df.groupby("frame").apply(lambda g: g.rolling(2).apply(get_number_of_keypoint_pairs, kwargs={}))

In [21]:
# def filter_and_augment_with_keypoint_columns(df: pd.DataFrame) -> pd.DataFrame:
#     mask = (df["cls"].isin(candidate_clsid_pairs.flatten())) & (df["conf"] > conf_threshold)
#     candidate_cls_df = df[mask].copy(deep=False)
#     candidate_cls_df["keypoint_line"] = candidate_cls_df["cls"].apply(lambda c: keypoints_to_lines_map[c]["line"])
#     candidate_cls_df["keypoint_line_boe"] = candidate_cls_df["cls"].apply(lambda c: keypoints_to_lines_map[c]["boe"])
#     candidate_cls_df["keypoint_line_pref"] = candidate_cls_df["cls"].apply(lambda c: keypoints_to_lines_map[c]["pref"])
#     return candidate_cls_df

In [22]:
keypoints_to_lines_map = {
    31: dict(name="TLC", line="TC", boe=0, pref=3),
    32: dict(name="TRC", line="TC", boe=1, pref=3),
    34: dict(name="TLF", line="TF", boe=0, pref=2),
    35: dict(name="TRF", line="TF", boe=1, pref=2),
    39: dict(name="BLF", line="BF", boe=0, pref=1),
    40: dict(name="BRF", line="BF", boe=1, pref=1),
    41: dict(name="BLC", line="BC", boe=0, pref=0),
    42: dict(name="BRC", line="BC", boe=1, pref=0),
}

In [30]:
# import keypoints_helper
first_frame = ponys_13cls_df[(ponys_13cls_df["frame"]==122) & (ponys_13cls_df["cls"].isin(keypoints_to_lines_map.keys()))].copy()
#df = keypoints_helper._augment_detection_dataframe_with_keypoint_columns(first_frame)
first_frame["keypoint_line"] = first_frame["cls"].apply(lambda c: keypoints_to_lines_map[c]["line"])
first_frame["keypoint_line_lr"] = first_frame["cls"].apply(lambda c: c % 2) # simplification
df_sorted = first_frame.sort_values(by=["cls"], ascending=False)
df_sorted


Unnamed: 0,cls,x,y,w,h,conf,id,frame,filename,keypoint_line,keypoint_line_lr
6,35,0.826798,0.374655,0.010504,0.018855,0.931251,19,122,pony_vs_the_killjoys_pool_004_1108.txt,TF,1
1,34,0.15088,0.377311,0.009848,0.017698,0.471962,5,122,pony_vs_the_killjoys_pool_004_1108.txt,TF,0
4,32,0.754572,0.28755,0.01091,0.019457,0.867787,15,122,pony_vs_the_killjoys_pool_004_1108.txt,TC,0
3,31,0.229559,0.290498,0.010561,0.018556,0.929126,13,122,pony_vs_the_killjoys_pool_004_1108.txt,TC,1


In [44]:
#df_agg = df_sorted.groupby("keypoint_line").aggregate("count").asof()
#df_agg = df_sorted.groupby("keypoint_line").agg({'count':('keypoint_line', 'count')})
df_agg = df_sorted.groupby("keypoint_line").agg(count=('keypoint_line', 'count')).reset_index()
#df_agg["extra"]=
df_agg

Unnamed: 0,keypoint_line,count
0,TC,2
1,TF,2


In [24]:
index_mask = df_sorted["cls"]==32
np.float32(df_sorted.loc[index_mask.index[0], "x":"y"])

array([0.826798, 0.374655], dtype=float32)

In [25]:
s_keypoints_by_count = df_sorted.groupby("keypoint_line")["keypoint_line"].aggregate("count").sort_values()

s_at_least_2 = s_keypoints_by_count[s_keypoints_by_count == 2]
keypoint_line_keys = s_at_least_2.keys()
keypoint_line_keys

Index(['TC', 'TF'], dtype='object', name='keypoint_line')