In [1]:
import pickle as pkl

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from data_utils import results2df
from torchmetrics.functional.classification import multilabel_f1_score
import os

plt.style.use("science")

In [2]:
train_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_feats=train_feats.pkl"
val_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_e=200_feats=val_feats.pkl"
metadata_file = "../dataset/metadata/new_metadata.csv"
behaviours_file = "../dataset/metadata/behaviours.txt"
segments_file = "../dataset/metadata/segments.txt"

camera_loc_df = pd.read_csv("../dataset/metadata/ordered_locations.txt", header=None)

# convert to list
camera_loc_list = camera_loc_df.values.tolist()
camera_loc_list = [loc[0] for loc in camera_loc_list]

with open(train_results, "rb") as f:
    train_data = pkl.load(f)

with open(
    val_results,
    "rb",
) as f:
    val_data = pkl.load(f)

metadata_df = pd.read_csv(metadata_file)

with open(behaviours_file, "rb") as f:
    behaviours = [beh.decode("utf-8").strip() for beh in f.readlines()]

with open(segments_file, "rb") as f:
    segments = [seg.decode("utf-8").strip() for seg in f.readlines()]

In [3]:
def count_videos_per_camera_behaviour(
    df: pd.DataFrame, camera_loc_list: list, num_labels: int = 14
):
    camera_loc_df = pd.DataFrame(camera_loc_list, columns=["utm"])
    df_count = pd.concat(
        [df.drop(columns="label"), df["label"].apply(pd.Series)],
        axis=1,
    )
    df_count = (
        df_count.groupby("utm")[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]]
        .sum()
        .reset_index()
    )
    df_count.columns = ["utm"] + [str(i) for i in range(num_labels)]
    df_count = df_count.merge(camera_loc_df, on="utm", how="right").fillna(0)
    df_count.iloc[:, 1:] = df_count.iloc[:, 1:].astype(int)
    return df_count

In [4]:
train_df, val_df = results2df(train_data, val_data, metadata_df)

print(train_df.shape, val_df.shape)
diff = set(val_df.utm.unique()) - set(train_df.utm.unique())
exclude_utm = list(diff)
# remove utms from new_val_df that are not in new_train_df
val_df = val_df[~val_df["utm"].isin(exclude_utm)]

print(train_df.columns)

train_camera_loc = train_df["utm"].unique().tolist()
val_camera_loc = val_df["utm"].unique().tolist()


# count number of videos per camera location and add to dataframe
train_video_count = (
    train_df.groupby("utm")["name"]
    .count()
    .reset_index()
    .rename(columns={"name": "video_count"})
)
val_video_count = (
    val_df.groupby("utm")["name"]
    .count()
    .reset_index()
    .rename(columns={"name": "video_count"})
)

train_beh_count = count_videos_per_camera_behaviour(train_df, train_camera_loc)
val_beh_count = count_videos_per_camera_behaviour(val_df, val_camera_loc)


  df["pred"] = df.pred.apply(lambda x: torch.sigmoid(torch.tensor(x)))


(2848, 24) (1003, 24)
Index(['name', 'split', 'pred', 'feat', 'negative_x', 'subject_id',
       'subject_id_bg', 'label', 'negative_y', 'country', 'research_site',
       'location_metadata', 'habitat', 'day', 'month', 'year', 'time_hr',
       'time_min', 'age_sex_group', 'site', 'value', 'utm', 'negative',
       'location_count'],
      dtype='object')


In [5]:
train_video_count

Unnamed: 0,utm,video_count
0,0214665_0056272,14
1,0214762_0051383,6
2,0215546_0053266,5
3,0215578_0053293,7
4,0215958_0050905,50
...,...,...
384,0809321_9882304,4
385,0809842_9882780,18
386,0809865_9882811,40
387,0810284_9882952,7


In [6]:
train_count = train_beh_count.merge(train_video_count, on="utm")
val_count = val_beh_count.merge(val_video_count, on="utm")

In [7]:
train_count

Unnamed: 0,utm,0,1,2,3,4,5,6,7,8,9,10,11,12,13,video_count
0,0381601_0967121,1,0,0,2,1,0,0,2,0,0,2,0,0,1,4
1,0337403_0191785,0,0,0,3,0,1,0,0,0,0,1,0,3,0,11
2,0381606_0967128,9,0,11,7,18,3,4,12,0,0,42,0,11,22,59
3,0340963_1432403,0,0,0,3,0,1,0,0,0,0,13,0,3,0,18
4,0337088_0191451,0,0,0,3,0,5,1,2,0,0,10,0,11,0,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,0744557_9558875,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
385,0343048_1434623,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
386,0744657_9564054,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
387,0522272_0587476,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [8]:
def count_unique_behaviours_per_row(row):
    # iterate over each row and count the number of unique behaviours
    beh_counter = 0
    for i in range(0, 13):
        if int(row.iloc[i]) > 0:
            beh_counter += 1
        
    return beh_counter

train_count["behaviour_count"] = train_count.apply(count_unique_behaviours_per_row, axis=1)
# remove columns 0, 1 ... 13
train_count = train_count.drop(columns=[str(i) for i in range(0, 14)])
val_count["behaviour_count"] = val_count.apply(count_unique_behaviours_per_row, axis=1)
val_count = val_count.drop(columns=[str(i) for i in range(0, 14)])

In [9]:
train_count

Unnamed: 0,utm,video_count,behaviour_count
0,0381601_0967121,4,6
1,0337403_0191785,11,4
2,0381606_0967128,59,9
3,0340963_1432403,18,4
4,0337088_0191451,28,6
...,...,...,...
384,0744557_9558875,1,1
385,0343048_1434623,1,1
386,0744657_9564054,1,1
387,0522272_0587476,1,1


In [10]:
val_count

Unnamed: 0,utm,video_count,behaviour_count
0,0381601_0967121,3,4
1,0337403_0191785,5,5
2,0381606_0967128,39,10
3,0340963_1432403,13,7
4,0231560_9396216,8,4
...,...,...,...
259,0686477_0649112,1,3
260,0256165_1293907,1,2
261,0486023_0569733,2,4
262,0687357_0648673,2,5


In [11]:
# sort by unique behaviours and then by total count
train_count = train_count.sort_values(
    by=[ "video_count", "behaviour_count",], ascending=[False, False]
)

val_count = val_count.sort_values(
    by=["video_count", "behaviour_count", ], ascending=[False, False]
)

In [12]:
val_count

Unnamed: 0,utm,video_count,behaviour_count
96,0216558_0056315,99,10
2,0381606_0967128,39,10
15,0336939_0191998,31,8
21,0336939_0191979,28,10
13,0381564_0967028,22,9
...,...,...,...
234,0782167_0812105,1,1
242,0660653_0606290,1,1
248,0696798_0596071,1,1
254,0744661_9561056,1,1


In [13]:
def return_segments(df_count, head=50, tail=10, return_cumsum_df=True, total_videos = 10):
    df_count["beh_coverage"] = (df_count["behaviour_count"] / total_videos) * 100

    # Select locations that make up 50% of the data
    head_df = df_count.query(f"beh_coverage >= {head}")

    # Calculate locations outside the top 50% with more than 10 samples
    tail_df = df_count.query(f"beh_coverage < {head} & behaviour_count >= {tail}")

    # Calculate locations with fewer than 10 samples
    few_shot_df = df_count.query(f"behaviour_count < {tail}")
    if return_cumsum_df:
        return head_df, tail_df, few_shot_df
    return (
        head_df["key"].values.tolist(),
        tail_df["key"].values.tolist(),
        few_shot_df["key"].values.tolist(),
    )


def return_segments_new(
    df_count, head=50, head_beh=50, tail=10, beh_tail=10, return_cumsum_df=True
):
    # rename columns
    total_videos = df_count["video_count"].sum()
    df_count["cumulative_count"] = df_count["video_count"].cumsum()

    total_beh = 14
    df_count["beh_coverage"] = (df_count["behaviour_count"] / total_beh) * 100

    df_count["cumulative_percentage"] = (
        df_count["cumulative_count"] / total_videos
    ) * 100

    # Select locations that make up 50% of the data
    head_df = df_count.query(
        f"beh_coverage > {head_beh} & cumulative_percentage <= {head}"
    )


    tail_df = df_count.query(
        f"beh_coverage < {head_beh} & cumulative_percentage > {head} & behaviour_count > {beh_tail} & video_count > {tail}"
    )
    
    few_shot_df = df_count.query(
        f"behaviour_count < {beh_tail}  & video_count < {tail}"
    )
    if return_cumsum_df:
        return head_df, tail_df, few_shot_df
    return (
        head_df["key"].values.tolist(),
        tail_df["key"].values.tolist(),
        few_shot_df["key"].values.tolist(),
    )

In [14]:
train_count

Unnamed: 0,utm,video_count,behaviour_count
89,0216558_0056315,156,10
2,0381606_0967128,59,9
16,0336939_0191998,55,8
22,0336939_0191979,54,9
178,0215958_0050905,50,7
...,...,...,...
384,0744557_9558875,1,1
385,0343048_1434623,1,1
386,0744657_9564054,1,1
387,0522272_0587476,1,1


In [15]:
#head_df, tail_df, few_shot_df = return_segments_new(
#    train_count, head=50, tail=10, head_beh=40, beh_tail=3
#)

head_df, tail_df, few_shot_df = return_segments(
    train_count, head=50, tail=3, total_videos=14
)

In [16]:
head_df.shape[0], tail_df.shape[0], few_shot_df.shape[0]


(35, 157, 197)

In [17]:
head_df

Unnamed: 0,utm,video_count,behaviour_count,beh_coverage
89,0216558_0056315,156,10,71.428571
2,0381606_0967128,59,9,64.285714
16,0336939_0191998,55,8,57.142857
22,0336939_0191979,54,9,64.285714
178,0215958_0050905,50,7,50.0
31,0809865_9882811,40,9,64.285714
72,0522043_0588350,35,8,57.142857
139,0807877_9881958,35,7,50.0
305,0687357_0648673,34,8,57.142857
13,0381564_0967028,33,10,71.428571


In [18]:
tail_df

Unnamed: 0,utm,video_count,behaviour_count,beh_coverage
68,0216572_0056318,45,6,42.857143
71,0745656_9557749,33,6,42.857143
219,0744684_9561569,31,6,42.857143
27,0257175_1291832,30,6,42.857143
4,0337088_0191451,28,6,42.857143
...,...,...,...,...
245,0701824_0597013,2,3,21.428571
268,0337015_0191426,2,3,21.428571
272,0699945_0599434,2,3,21.428571
319,0689398_0650122,2,3,21.428571


In [19]:
few_shot_df

Unnamed: 0,utm,video_count,behaviour_count,beh_coverage
83,0545171_0819699,12,2,14.285714
106,0231921_9394324,12,2,14.285714
174,0544176_0818106,12,2,14.285714
180,0548309_0822019,12,2,14.285714
133,0336676_0190754,11,1,7.142857
...,...,...,...,...
384,0744557_9558875,1,1,7.142857
385,0343048_1434623,1,1,7.142857
386,0744657_9564054,1,1,7.142857
387,0522272_0587476,1,1,7.142857


In [20]:
head_train_list = head_df["utm"].values.tolist()
tail_train_list = tail_df["utm"].values.tolist()
few_shot_train_list = few_shot_df["utm"].values.tolist()



In [21]:
def calculate_metrics(df, round_to=3, show_per_class=False):
    f1_values = multilabel_f1_score(
        torch.tensor(np.stack(df["pred"])),
        torch.tensor(np.stack(df["label"])),
        num_labels=14,
        average="none",
    )
    # avg_map = round(map_values.mean().item(), round_to)
    avg_f1 = round(f1_values.mean().item(), round_to)
    if show_per_class:
        val_list = []

        for v in f1_values:
            val_list.append(round(v.item(), round_to))

        # return map_values
        return val_list
    return avg_f1

In [22]:
th_df = train_df[train_df["utm"].isin(head_train_list)]
tt_df = train_df[train_df["utm"].isin(tail_train_list)]
tf_df = train_df[train_df["utm"].isin(few_shot_train_list)]

vh_df = val_df[val_df["utm"].isin(head_train_list)]
vt_df = val_df[val_df["utm"].isin(tail_train_list)]
vf_df = val_df[val_df["utm"].isin(few_shot_train_list)]

In [23]:
avg_map_th = calculate_metrics(th_df)
avg_map_tt = calculate_metrics(tt_df)
avg_map_tf = calculate_metrics(tf_df)


avg_map_vh = calculate_metrics(vh_df)
avg_map_vt = calculate_metrics(vt_df)
avg_map_vf = calculate_metrics(vf_df)


map_values_th = calculate_metrics(th_df, show_per_class=True)
map_values_tt = calculate_metrics(tt_df, show_per_class=True)
map_values_tf = calculate_metrics(tf_df, show_per_class=True)

map_values_vh = calculate_metrics(vh_df, show_per_class=True)
map_values_vt = calculate_metrics(vt_df, show_per_class=True)
map_values_vf = calculate_metrics(vf_df, show_per_class=True)

# show as dataframe
result_avg = pd.DataFrame(
    {
        "head": [avg_map_th, avg_map_vh],
        "tail": [avg_map_tt, avg_map_vt],
        "few_shot": [avg_map_tf, avg_map_vf],
    },
    index=["train", "val"],
)

result_avg

Unnamed: 0,head,tail,few_shot
train,0.942,0.948,0.457
val,0.508,0.452,0.226


In [24]:
# create result df and combine map_values_th
results_map = pd.DataFrame(
    {
        "head": map_values_th,
        "tail": map_values_tt,
        "few_shot": map_values_tf,
        "head_val": map_values_vh,
        "tail_val": map_values_vt,
        "few_shot_val": map_values_vf,
        "behaviours": behaviours,
        "segments": segments,
    }
    
)


In [25]:
# the more behaviours types each camera location has, the better its performance
results_map

Unnamed: 0,head,tail,few_shot,head_val,tail_val,few_shot_val,behaviours,segments
0,0.982,0.95,0.0,0.4,0.286,0.0,aggression,few_shot
1,0.946,0.914,0.8,0.457,0.091,0.0,bipedal,tail
2,0.933,0.975,0.958,0.353,0.596,0.5,camera_reaction,tail
3,0.976,0.927,0.9,0.717,0.6,0.286,climbing,tail
4,0.954,0.914,0.0,0.554,0.308,0.0,display,few_shot
5,0.975,0.962,1.0,0.877,0.697,0.125,feeding,tail
6,0.887,0.914,0.0,0.262,0.4,0.0,grooming,tail
7,0.945,0.954,0.0,0.465,0.442,0.222,object_carrying,tail
8,0.923,0.957,0.0,0.308,0.182,0.0,piloerection,few_shot
9,0.846,0.941,0.0,0.0,0.0,0.0,playing,few_shot
