In [1]:
import pickle as pkl

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from data_utils import results2df
from torchmetrics.functional.classification import multilabel_f1_score
import os
import torch

import pickle as pkl
import pandas as pd
import numpy as np

from torchmetrics.functional.classification import (
    multilabel_f1_score,
    multilabel_precision,
    multilabel_recall,
)

import pandas as pd

from data_utils import results2df

pd.options.mode.copy_on_write = True

plt.style.use("science")

In [2]:
def calculate_metrics(preds, labels, behaviour_idices:list ):
    # Convert preds and labels to tensors
    preds, labels = np.stack(preds), np.stack(labels)
    preds, labels = torch.tensor(preds, dtype=torch.float32), torch.tensor(labels)
    # Calculate metrics
    f1 = multilabel_f1_score(preds, labels, num_labels=14, average="none")
    #precision = multilabel_precision(preds, labels, num_labels=14, average="none")
    #recall = multilabel_recall(preds, labels, num_labels=14, average="none")

    f1_store = []
    for i in behaviour_idices:
        f1_store.append(f1[i].item())
    return f1_store#, precision, recall

def measure_domain_shift(train_df, val_df, pred_column, behaviours):

    store = []
    mutual_store = []
    exclusive_store = []

    overall_store = []

    for idx in range(len(behaviours)):
        val_agg_df = val_df[val_df.label.apply(lambda x: x[idx] == 1)]
        train_agg_df = train_df[train_df.label.apply(lambda x: x[idx] == 1)]

    

        overall_f1, overall_precision, overall_recall = calculate_metrics(
            val_agg_df[pred_column].values, val_agg_df["label"].values
        )



    

        mutual_df = val_agg_df[val_agg_df["utm"].isin(train_agg_df["utm"])]
        mutual_videos = len(mutual_df["utm"].unique())

        mutual_f1, mutual_precision, mutual_recall = calculate_metrics(
            mutual_df[pred_column].values, mutual_df["label"].values
        )

        exclusive_df = val_agg_df[~val_agg_df["utm"].isin(train_agg_df["utm"])]
        exclusive_videos = len(exclusive_df["utm"].unique())

        exclusive_f1, exclusive_precision, exclusive_recall = calculate_metrics(
            exclusive_df[pred_column].values, exclusive_df["label"].values
        )

        store.append(
            {
                "behaviour": behaviours[idx],
                "mutual_ct_loc": mutual_videos,
                "exclusive_ct_loc": exclusive_videos,
                "mutual_loc_prop": round(
                    mutual_videos / (exclusive_videos + mutual_videos), 2
                ),
                "overall_recall": overall_recall[idx].item(),
                "overall_precision": overall_precision[idx].item(),
                "overall_f1": overall_f1[idx].item(),
                "mutual_recall": mutual_recall[idx].item(),
                "mutual_precision": mutual_precision[idx].item(),
                "mutual_f1": mutual_f1[idx].item(),
                "exclusive_recall": exclusive_recall[idx].item(),
                "exclusive_precision": exclusive_precision[idx].item(),
                "exclusive_f1": exclusive_f1[idx].item(),
            }
        )

        val_agg_df.loc[:, "behaviour"] = behaviours[idx]
        overall_store.append(val_agg_df)
        mutual_df.loc [:, "behaviour"] = behaviours[idx]
        mutual_store.append(mutual_df)
        exclusive_df.loc[:, "behaviour"] = behaviours[idx]
        exclusive_store.append(exclusive_df)

        


    df = pd.DataFrame(store)

    # Round all numerical columns to 2 decimal places
    df = df.round(4)

    mutual_df = pd.concat(mutual_store)
    exclusive_df = pd.concat(exclusive_store)
    overall_df = pd.concat(overall_store)


    

    return mutual_df, exclusive_df, overall_df, df

In [3]:
train_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_feats=train_feats.pkl"
val_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_e=200_feats=val_feats.pkl"
metadata_file = "../dataset/metadata/new_metadata.csv"
behaviours_file = "../dataset/metadata/behaviours.txt"
segments_file = "../dataset/metadata/segments.txt"

camera_loc_df = pd.read_csv("../dataset/metadata/ordered_locations.txt", header=None)

# convert to list
camera_loc_list = camera_loc_df.values.tolist()
camera_loc_list = [loc[0] for loc in camera_loc_list]

with open(train_results, "rb") as f:
    train_data = pkl.load(f)

with open(
    val_results,
    "rb",
) as f:
    val_data = pkl.load(f)

metadata_df = pd.read_csv(metadata_file)

with open(behaviours_file, "rb") as f:
    behaviours = [beh.decode("utf-8").strip() for beh in f.readlines()]

with open(segments_file, "rb") as f:
    segments = [seg.decode("utf-8").strip() for seg in f.readlines()]

In [4]:



def calculate_num_sample_per_behaviour(df, behaviours):

    overall_store = []

    for idx in range(len(behaviours)):
        agg_df = df[df.label.apply(lambda x: x[idx] == 1)]
        agg_df.loc[:, "behaviour"] = behaviours[idx]
        overall_store.append(agg_df)
    
    overall_df = pd.concat(overall_store)

    return overall_df


def return_cam_loc_segments(df_count, head=50, tail=20, return_cumsum_df=True):
    # rename columns
    total_videos = df_count["cam_loc_count"].sum()

    df_count["cumulative_count"] = df_count["cam_loc_count"].cumsum()
    df_count["cumulative_percentage"] = (
        df_count["cumulative_count"] / total_videos
    ) * 100

    # Select locations that make up 50% of the data
    head_df = df_count[df_count["cumulative_percentage"] <= head]

    # Calculate locations outside the top 50% with more than 10 samples
    tail_df = df_count[df_count["cumulative_percentage"] > head]
    tail_df = tail_df[tail_df["cam_loc_count"] >= tail]

    # Calculate locations with fewer than 10 samples
    few_shot_df = df_count[df_count["cam_loc_count"] < tail]
    if return_cumsum_df:
        return head_df, tail_df, few_shot_df
    return (
        head_df["behaviour"].values.tolist(),
        tail_df["behaviour"].values.tolist(),
        few_shot_df["behaviour"].values.tolist(),
    )


def count_cam_loc(df, behaviours):
    store = []

    for behaviour in behaviours:
        subset_df = df[df["behaviour"]==behaviour]

        # count the number of samples per location
        cam_loc_count = subset_df["utm"].nunique()

        store.append({
            "behaviour": behaviour,
            "cam_loc_count": cam_loc_count
        })
        
    return pd.DataFrame(store)

def return_beh_segments(df, head_beh, tail_beh, few_shot_beh):

    head_df = df[df["behaviour"].isin(head_beh)]
    tail_df = df[df["behaviour"].isin(tail_beh)]
    few_shot_df = df[df["behaviour"].isin(few_shot_beh)]

    return head_df, tail_df, few_shot_df


In [5]:
train_df, val_df = results2df(train_data, val_data, metadata_df)


train_df_count = calculate_num_sample_per_behaviour(train_df, behaviours)
val_df_count = calculate_num_sample_per_behaviour(val_df, behaviours)

  df["pred"] = df.pred.apply(lambda x: torch.sigmoid(torch.tensor(x)))


In [6]:
train_df_loc_count = count_cam_loc(train_df_count, behaviours)
train_df_loc_count.sort_values(by="cam_loc_count", ascending=False, inplace=True)

In [7]:
train_df_loc_count

Unnamed: 0,behaviour,cam_loc_count
12,travel,324
10,resting,206
2,camera_reaction,128
5,feeding,93
3,climbing,78
7,object_carrying,78
6,grooming,66
13,vocalisation,51
11,tool_use,42
1,bipedal,35


In [8]:
head_train_beh, tail_train_beh, few_shot_train_beh = return_cam_loc_segments(
    train_df_loc_count, tail=20, head=50, return_cumsum_df=False
)



In [9]:
head_train_beh

['travel', 'resting']

In [10]:
head_train_df, tail_train_df, few_shot_train_df = return_beh_segments(train_df_count, head_train_beh, tail_train_beh, few_shot_train_beh)
head_val_df, tail_val_df, few_shot_val_df = return_beh_segments(val_df_count, head_train_beh, tail_train_beh, few_shot_train_beh)

In [11]:
head_train_df


Unnamed: 0,name,split,pred,feat,negative_x,subject_id,subject_id_bg,label,negative_y,country,...,year,time_hr,time_min,age_sex_group,site,value,utm,negative,location_count,behaviour
2,36070466.mp4,train,"[tensor(0.0110), tensor(0.0004), tensor(0.0100...","[tensor(0.0224, device='cuda:0'), tensor(0.219...",False,36070466.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,2.0,51.0,adult male,geprenaf,"travel,resting",0381606_0967128,False,98,resting
3,36070483.mp4,train,"[tensor(0.0045), tensor(0.0003), tensor(0.1025...","[tensor(0.0350, device='cuda:0'), tensor(0.350...",False,36070483.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,58.0,adult unclear,geprenaf,resting,0381606_0967128,False,98,resting
4,36070475.mp4,train,"[tensor(0.0005), tensor(3.1627e-06), tensor(0....","[tensor(0.2311, device='cuda:0'), tensor(0.032...",False,36070475.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1]",False,cotedivoire,...,2014.0,2.0,54.0,adult male,geprenaf,"resting,object_carrying,resting,vocalisation",0381606_0967128,False,98,resting
6,36070471.mp4,train,"[tensor(0.0396), tensor(0.0022), tensor(0.3375...","[tensor(0.0250, device='cuda:0'), tensor(0.078...",False,36070471.mp4,36070480.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,53.0,adult male,geprenaf,"camera_reaction,resting",0381606_0967128,False,98,resting
7,36070467.mp4,train,"[tensor(0.0264), tensor(0.0014), tensor(0.2416...","[tensor(0.0362, device='cuda:0'), tensor(0.109...",False,36070467.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,51.0,adult male,geprenaf,resting,0381606_0967128,False,98,resting
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1871,36070454.mp4,train,"[tensor(0.0002), tensor(9.2608e-05), tensor(0....","[tensor(0.1199, device='cuda:0'), tensor(0.282...",False,36070454.mp4,36070401.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,21.0,30.0,adult male,geprenaf,"camera_reaction,resting,travel",0381606_0967128,False,98,travel
1874,36070459.mp4,train,"[tensor(6.4438e-06), tensor(5.1429e-08), tenso...","[tensor(0.2199, device='cuda:0'), tensor(0.357...",False,36070459.mp4,36070401.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,21.0,33.0,adult male,geprenaf,"resting,travel",0381606_0967128,False,98,travel
1875,acp000dhkr.mp4,train,"[tensor(1.0010e-05), tensor(1.0000), tensor(1....","[tensor(0.0095, device='cuda:0'), tensor(0.566...",False,acp000dhkr.mp4,acp000dhkz.mp4,"[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",False,uganda,...,2013.0,22.0,4.0,adult male,ngogo,"travel,camera_reaction,bipedal",0214665_0056272,False,16,travel
1876,59752466.mp4,train,"[tensor(5.2558e-07), tensor(1.4160e-05), tenso...","[tensor(0.0004, device='cuda:0'), tensor(1.466...",False,59752466.mp4,59752467.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",False,tanzania,...,2014.0,22.0,4.0,adult unclear,ugalla,travel,0232212_9394186,False,3,travel


In [12]:
head_train_df

Unnamed: 0,name,split,pred,feat,negative_x,subject_id,subject_id_bg,label,negative_y,country,...,year,time_hr,time_min,age_sex_group,site,value,utm,negative,location_count,behaviour
2,36070466.mp4,train,"[tensor(0.0110), tensor(0.0004), tensor(0.0100...","[tensor(0.0224, device='cuda:0'), tensor(0.219...",False,36070466.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,2.0,51.0,adult male,geprenaf,"travel,resting",0381606_0967128,False,98,resting
3,36070483.mp4,train,"[tensor(0.0045), tensor(0.0003), tensor(0.1025...","[tensor(0.0350, device='cuda:0'), tensor(0.350...",False,36070483.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,58.0,adult unclear,geprenaf,resting,0381606_0967128,False,98,resting
4,36070475.mp4,train,"[tensor(0.0005), tensor(3.1627e-06), tensor(0....","[tensor(0.2311, device='cuda:0'), tensor(0.032...",False,36070475.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1]",False,cotedivoire,...,2014.0,2.0,54.0,adult male,geprenaf,"resting,object_carrying,resting,vocalisation",0381606_0967128,False,98,resting
6,36070471.mp4,train,"[tensor(0.0396), tensor(0.0022), tensor(0.3375...","[tensor(0.0250, device='cuda:0'), tensor(0.078...",False,36070471.mp4,36070480.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,53.0,adult male,geprenaf,"camera_reaction,resting",0381606_0967128,False,98,resting
7,36070467.mp4,train,"[tensor(0.0264), tensor(0.0014), tensor(0.2416...","[tensor(0.0362, device='cuda:0'), tensor(0.109...",False,36070467.mp4,36070480.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",False,cotedivoire,...,2014.0,2.0,51.0,adult male,geprenaf,resting,0381606_0967128,False,98,resting
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1871,36070454.mp4,train,"[tensor(0.0002), tensor(9.2608e-05), tensor(0....","[tensor(0.1199, device='cuda:0'), tensor(0.282...",False,36070454.mp4,36070401.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,21.0,30.0,adult male,geprenaf,"camera_reaction,resting,travel",0381606_0967128,False,98,travel
1874,36070459.mp4,train,"[tensor(6.4438e-06), tensor(5.1429e-08), tenso...","[tensor(0.2199, device='cuda:0'), tensor(0.357...",False,36070459.mp4,36070401.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",False,cotedivoire,...,2014.0,21.0,33.0,adult male,geprenaf,"resting,travel",0381606_0967128,False,98,travel
1875,acp000dhkr.mp4,train,"[tensor(1.0010e-05), tensor(1.0000), tensor(1....","[tensor(0.0095, device='cuda:0'), tensor(0.566...",False,acp000dhkr.mp4,acp000dhkz.mp4,"[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",False,uganda,...,2013.0,22.0,4.0,adult male,ngogo,"travel,camera_reaction,bipedal",0214665_0056272,False,16,travel
1876,59752466.mp4,train,"[tensor(5.2558e-07), tensor(1.4160e-05), tenso...","[tensor(0.0004, device='cuda:0'), tensor(1.466...",False,59752466.mp4,59752467.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",False,tanzania,...,2014.0,22.0,4.0,adult unclear,ugalla,travel,0232212_9394186,False,3,travel


In [13]:
# find head behaviours indices
head_beh_indices = [behaviours.index(beh) for beh in head_train_beh]
tail_beh_indices = [behaviours.index(beh) for beh in tail_train_beh]
few_shot_beh_indices = [behaviours.index(beh) for beh in few_shot_train_beh]

In [14]:
tail_train_beh

['camera_reaction',
 'feeding',
 'climbing',
 'object_carrying',
 'grooming',
 'vocalisation',
 'tool_use',
 'bipedal',
 'display',
 'aggression']

In [15]:
head_beh_indices

[12, 10]

In [16]:
map_beh_th = calculate_metrics(head_train_df["pred"].values, head_train_df["label"].values, head_beh_indices)
map_beh_tt = calculate_metrics(tail_train_df["pred"].values, tail_train_df["label"].values, tail_beh_indices)
map_beh_tf = calculate_metrics(few_shot_train_df["pred"].values, few_shot_train_df["label"].values, few_shot_beh_indices)

map_beh_vh = calculate_metrics(head_val_df["pred"].values, head_val_df["label"].values, head_beh_indices)
map_beh_vt = calculate_metrics(tail_val_df["pred"].values, tail_val_df["label"].values, tail_beh_indices)
map_beh_vf = calculate_metrics(few_shot_train_df["pred"].values, few_shot_train_df["label"].values, few_shot_beh_indices)

In [17]:
map_store = []

for i in range(len(behaviours)):
    if i in head_beh_indices:
        # i should be equal to the value of the behaviour
    
        map_store.append({
            "behaviour": behaviours[i],
            "train_map": map_beh_th[head_beh_indices.index(i)],
            "val_map": map_beh_vh[head_beh_indices.index(i)]
        })
    elif i in tail_beh_indices:


        map_store.append({
            "behaviour": behaviours[i],
            "train_map": map_beh_tt[tail_beh_indices.index(i)],
            "val_map": map_beh_vt[tail_beh_indices.index(i)]
        })
    elif i in few_shot_beh_indices:
        map_store.append({
            "behaviour": behaviours[few_shot_beh_indices.index(i)],
            "train_map": map_beh_tf[few_shot_beh_indices.index(i)],
            "val_map": map_beh_vf[few_shot_beh_indices.index(i)]
        })
        

In [18]:
excl_beh_list = ['aggression',
 'camera_reaction',
 'climbing',
 'feeding',
 'object_carrying',
 'resting',
 'tool_use',
 'travel']

mutual_beh_list = ['bipedal', 'display', 'grooming', 'piloerection', 'playing', 'vocalisation']

In [19]:
map_df = pd.DataFrame(map_store)

map_df["diff"] = map_df["train_map"] - map_df["val_map"]
map_df["segment"] = segments
map_df["behaviour_type"] = "exclusive"
map_df.loc[map_df["behaviour"].isin(mutual_beh_list), "behaviour_type"] = "mutual"

In [20]:
map_df

Unnamed: 0,behaviour,train_map,val_map,diff,segment,behaviour_type
0,aggression,0.996923,0.460317,0.536606,few_shot,exclusive
1,bipedal,0.972222,0.43,0.542222,tail,mutual
2,camera_reaction,0.975281,0.512563,0.462718,tail,exclusive
3,climbing,0.985885,0.692683,0.293202,tail,exclusive
4,display,0.97593,0.669725,0.306205,few_shot,mutual
5,feeding,0.986932,0.834728,0.152203,tail,exclusive
6,grooming,0.907336,0.262626,0.64471,tail,mutual
7,object_carrying,0.968781,0.514039,0.454743,tail,exclusive
8,aggression,1.0,1.0,0.0,few_shot,exclusive
9,bipedal,0.974359,0.974359,0.0,few_shot,mutual


In [21]:
head_train_beh 

['travel', 'resting']

In [22]:
behaviours

['aggression',
 'bipedal',
 'camera_reaction',
 'climbing',
 'display',
 'feeding',
 'grooming',
 'object_carrying',
 'piloerection',
 'playing',
 'resting',
 'tool_use',
 'travel',
 'vocalisation']

In [23]:
head_beh_indices

[12, 10]

In [24]:
# make a list for each behaviour "few_shot", "tail", "tail", "few_shot"

# make a list if the behaviour is in the head, tail or few_shot
behaviour_segment_list = []
for idx in range(len(behaviours)):
    if idx in head_beh_indices:
        behaviour_segment_list.append("head")
    elif idx in tail_beh_indices:
        behaviour_segment_list.append("tail")
    else:
        behaviour_segment_list.append("few_shot")

