In [23]:
import pickle as pkl

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from data_utils import results2df
from torchmetrics.functional.classification import multilabel_f1_score
import torch

import pickle as pkl
import pandas as pd
import numpy as np

from torchmetrics.functional.classification import (
    multilabel_f1_score,
    multilabel_precision,
    multilabel_recall,
)

import pandas as pd

from data_utils import results2df

pd.options.mode.copy_on_write = True

plt.style.use("science")

In [24]:
def calculate_metrics(preds, labels):
    # Convert preds and labels to tensors
    preds, labels = np.stack(preds), np.stack(labels)
    preds, labels = torch.tensor(preds, dtype=torch.float32), torch.tensor(labels)
    # Calculate metrics
    f1 = multilabel_f1_score(preds, labels, num_labels=14, average="none")
    precision = multilabel_precision(preds, labels, num_labels=14, average="none")
    recall = multilabel_recall(preds, labels, num_labels=14, average="none")
    return f1, precision, recall

def return_cam_segments(df_count, head=50, tail=10, return_cumsum_df=True):
    # rename columns
    total_videos = df_count["video_count"].sum()

    df_count["cumulative_count"] = df_count["video_count"].cumsum()
    df_count["cumulative_percentage"] = (
        df_count["cumulative_count"] / total_videos
    ) * 100

    # Select locations that make up 50% of the data
    head_df = df_count[df_count["cumulative_percentage"] <= head]

    # Calculate locations outside the top 50% with more than 10 samples
    tail_df = df_count[df_count["cumulative_percentage"] > head]
    tail_df = tail_df[tail_df["video_count"] >= tail]

    # Calculate locations with fewer than 10 samples
    few_shot_df = df_count[df_count["video_count"] < tail]
    if return_cumsum_df:
        return head_df, tail_df, few_shot_df
    return (
        head_df["utm"].values.tolist(),
        tail_df["utm"].values.tolist(),
        few_shot_df["utm"].values.tolist(),
    )


In [25]:
train_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_feats=train_feats.pkl"
val_results = "../dataset/results/model=slow_r50_ds=panaf_seq_fd_only_e=200_feats=val_feats.pkl"
metadata_file = "../dataset/metadata/new_metadata.csv"
behaviours_file = "../dataset/metadata/behaviours.txt"
segments_file = "../dataset/metadata/segments.txt"

camera_loc_df = pd.read_csv("../dataset/metadata/ordered_locations.txt", header=None)

# convert to list
camera_loc_list = camera_loc_df.values.tolist()
camera_loc_list = [loc[0] for loc in camera_loc_list]

with open(train_results, "rb") as f:
    train_data = pkl.load(f)

with open(
    val_results,
    "rb",
) as f:
    val_data = pkl.load(f)

metadata_df = pd.read_csv(metadata_file)

with open(behaviours_file, "rb") as f:
    behaviours = [beh.decode("utf-8").strip() for beh in f.readlines()]

with open(segments_file, "rb") as f:
    segments = [seg.decode("utf-8").strip() for seg in f.readlines()]

In [26]:
def count_videos_per_camera_behaviour(
    df: pd.DataFrame, camera_loc_list: list, num_labels: int = 14
):
    camera_loc_df = pd.DataFrame(camera_loc_list, columns=["utm"])
    df_count = pd.concat(
        [df.drop(columns="label"), df["label"].apply(pd.Series)],
        axis=1,
    )
    df_count = (
        df_count.groupby("utm")[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]]
        .sum()
        .reset_index()
    )
    df_count.columns = ["utm"] + [str(i) for i in range(num_labels)]
    df_count = df_count.merge(camera_loc_df, on="utm", how="right").fillna(0)
    df_count.iloc[:, 1:] = df_count.iloc[:, 1:].astype(int)
    return df_count


def calculate_metrics(preds, labels):
    # Convert preds and labels to tensors
    preds, labels = np.stack(preds), np.stack(labels)
    preds, labels = torch.tensor(preds, dtype=torch.float32), torch.tensor(labels)
    # Calculate metrics
    f1 = multilabel_f1_score(preds, labels, num_labels=14, average="none")
    precision = multilabel_precision(preds, labels, num_labels=14, average="none")
    recall = multilabel_recall(preds, labels, num_labels=14, average="none")
    return f1, precision, recall


def measure_domain_shift(train_df, val_df, pred_column, behaviours):

    store = []
    mutual_store = []
    exclusive_store = []

    overall_store = []

    for idx in range(len(behaviours)):
        val_agg_df = val_df[val_df.label.apply(lambda x: x[idx] == 1)]
        train_agg_df = train_df[train_df.label.apply(lambda x: x[idx] == 1)]

    

        overall_f1, overall_precision, overall_recall = calculate_metrics(
            val_agg_df[pred_column].values, val_agg_df["label"].values
        )



    

        mutual_df = val_agg_df[val_agg_df["utm"].isin(train_agg_df["utm"])]
        mutual_videos = len(mutual_df["utm"].unique())

        mutual_f1, mutual_precision, mutual_recall = calculate_metrics(
            mutual_df[pred_column].values, mutual_df["label"].values
        )

        exclusive_df = val_agg_df[~val_agg_df["utm"].isin(train_agg_df["utm"])]
        exclusive_videos = len(exclusive_df["utm"].unique())

        exclusive_f1, exclusive_precision, exclusive_recall = calculate_metrics(
            exclusive_df[pred_column].values, exclusive_df["label"].values
        )

        store.append(
            {
                "behaviour": behaviours[idx],
                "mutual_ct_loc": mutual_videos,
                "exclusive_ct_loc": exclusive_videos,
                "mutual_loc_prop": round(
                    mutual_videos / (exclusive_videos + mutual_videos), 2
                ),
                "overall_recall": overall_recall[idx].item(),
                "overall_precision": overall_precision[idx].item(),
                "overall_f1": overall_f1[idx].item(),
                "mutual_recall": mutual_recall[idx].item(),
                "mutual_precision": mutual_precision[idx].item(),
                "mutual_f1": mutual_f1[idx].item(),
                "exclusive_recall": exclusive_recall[idx].item(),
                "exclusive_precision": exclusive_precision[idx].item(),
                "exclusive_f1": exclusive_f1[idx].item(),
            }
        )

        val_agg_df.loc[:, "behaviour"] = behaviours[idx]
        overall_store.append(val_agg_df)
        mutual_df.loc [:, "behaviour"] = behaviours[idx]
        mutual_store.append(mutual_df)
        exclusive_df.loc[:, "behaviour"] = behaviours[idx]
        exclusive_store.append(exclusive_df)

        


    df = pd.DataFrame(store)

    # Round all numerical columns to 2 decimal places
    df = df.round(4)

    mutual_df = pd.concat(mutual_store)
    exclusive_df = pd.concat(exclusive_store)
    overall_df = pd.concat(overall_store)


    

    return mutual_df, exclusive_df, overall_df, df


def return_beh_segments(df_count, head=50, tail=10, return_cumsum_df=True, total_behaviours = 14):
    df_count["beh_coverage"] = (df_count["behaviour_count"] / total_behaviours) * 100

    # Select locations that make up 50% of the data
    head_df = df_count.query(f"beh_coverage >= {head}")

    # Calculate locations outside the top 50% with more than 10 samples
    tail_df = df_count.query(f"beh_coverage < {head} & behaviour_count >= {tail}")

    # Calculate locations with fewer than 10 samples
    few_shot_df = df_count.query(f"behaviour_count < {tail}")
    if return_cumsum_df:
        return head_df, tail_df, few_shot_df
    return (
        head_df["utm"].values.tolist(),
        tail_df["utm"].values.tolist(),
        few_shot_df["utm"].values.tolist(),
    )

In [27]:
train_df, val_df = results2df(train_data, val_data, metadata_df)

#print(train_df.shape, val_df.shape)
#diff = set(val_df.utm.unique()) - set(train_df.utm.unique())
#exclude_utm = list(diff)
# remove utms from new_val_df that are not in new_train_df
#val_df = val_df[~val_df["utm"].isin(exclude_utm)]

print(train_df.columns)

#train_camera_loc = train_df["utm"].unique().tolist()
#val_camera_loc = val_df["utm"].unique().tolist()

# concatenate train and val dataframes
df = pd.concat([train_df, val_df])


# count number of videos per camera location and add to dataframe
video_count = (
    df.groupby("utm")["name"]
    .count()
    .reset_index()
    .rename(columns={"name": "video_count"})
)



beh_count = count_videos_per_camera_behaviour(df, video_count["utm"].unique())



  df["pred"] = df.pred.apply(lambda x: torch.sigmoid(torch.tensor(x)))


Index(['name', 'split', 'pred', 'feat', 'negative_x', 'subject_id',
       'subject_id_bg', 'label', 'negative_y', 'country', 'research_site',
       'location_metadata', 'habitat', 'day', 'month', 'year', 'time_hr',
       'time_min', 'age_sex_group', 'site', 'value', 'utm', 'negative',
       'location_count'],
      dtype='object')


In [28]:
count = beh_count.merge(video_count, on="utm")


In [29]:
def count_unique_behaviours_per_row(row):
    # iterate over each row and count the number of unique behaviours
    beh_counter = 0
    for i in range(0, 13):
        if int(row.iloc[i]) > 0:
            beh_counter += 1
        
    return beh_counter

count["behaviour_count"] = count.apply(count_unique_behaviours_per_row, axis=1)


In [30]:
# sort by unique behaviours and then by total count
count =count.sort_values(
    by=[ "behaviour_count", "video_count",], ascending=[False, False]
)



In [31]:
# select video count and utm for separate df
#video_count = count[["utm", "video_count"]].sort_values(by="video_count", ascending=False)

video_count= count

beh_count = count.sort_values(by="video_count", ascending=False)


In [32]:
beh_count

Unnamed: 0,utm,0,1,2,3,4,5,6,7,8,9,10,11,12,13,video_count,behaviour_count
7,0216558_0056315,1,49,26,27,0,194,27,8,0,6,180,1,48,1,255,11
129,0381606_0967128,15,4,20,10,33,4,8,24,0,0,71,0,22,36,98,10
90,0336939_0191998,0,7,3,43,0,30,3,13,0,0,27,0,29,1,86,8
89,0336939_0191979,0,7,3,45,0,29,2,8,2,3,22,0,21,1,82,10
4,0215958_0050905,0,8,8,8,0,48,3,0,0,0,25,0,10,1,58,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,0485940_0567446,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
138,0484469_0566320,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
132,0384492_0963528,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
125,0378648_0964069,0,0,0,0,0,0,0,0,0,0,1,0,1,0,2,2


In [33]:
beh_count

Unnamed: 0,utm,0,1,2,3,4,5,6,7,8,9,10,11,12,13,video_count,behaviour_count
7,0216558_0056315,1,49,26,27,0,194,27,8,0,6,180,1,48,1,255,11
129,0381606_0967128,15,4,20,10,33,4,8,24,0,0,71,0,22,36,98,10
90,0336939_0191998,0,7,3,43,0,30,3,13,0,0,27,0,29,1,86,8
89,0336939_0191979,0,7,3,45,0,29,2,8,2,3,22,0,21,1,82,10
4,0215958_0050905,0,8,8,8,0,48,3,0,0,0,25,0,10,1,58,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,0485940_0567446,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
138,0484469_0566320,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
132,0384492_0963528,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,2
125,0378648_0964069,0,0,0,0,0,0,0,0,0,0,1,0,1,0,2,2


In [34]:
#head_beh_df, tail_beh_df, few_shot_beh_df = return_beh_segments(
#    beh_count, head=50, tail=3, total_behaviours=11
#)

head_beh_df, tail_beh_df, few_shot_beh_df = return_cam_segments(
    beh_count, tail=5
)



In [35]:
mutual_df, exclusive_df, overall_df, original_df = measure_domain_shift(
    train_df, val_df, pred_column="pred", behaviours=behaviours
)

In [36]:
original_df

Unnamed: 0,behaviour,mutual_ct_loc,exclusive_ct_loc,mutual_loc_prop,overall_recall,overall_precision,overall_f1,mutual_recall,mutual_precision,mutual_f1,exclusive_recall,exclusive_precision,exclusive_f1
0,aggression,6,8,0.43,0.28,1.0,0.4375,0.3529,1.0,0.5217,0.125,1.0,0.2222
1,bipedal,11,11,0.5,0.3269,1.0,0.4928,0.5,1.0,0.6667,0.0,0.0,0.0
2,camera_reaction,44,39,0.53,0.4676,1.0,0.6373,0.5213,1.0,0.6853,0.3556,1.0,0.5246
3,climbing,35,20,0.64,0.6154,1.0,0.7619,0.6911,1.0,0.8173,0.15,1.0,0.2609
4,display,7,5,0.58,0.5263,1.0,0.6897,0.6061,1.0,0.7547,0.0,0.0,0.0
5,feeding,57,16,0.78,0.7584,1.0,0.8626,0.8088,1.0,0.8943,0.0556,1.0,0.1053
6,grooming,24,16,0.6,0.2609,1.0,0.4138,0.3462,1.0,0.5143,0.0,0.0,0.0
7,object_carrying,27,23,0.54,0.402,1.0,0.5734,0.5333,1.0,0.6957,0.037,1.0,0.0714
8,piloerection,6,7,0.46,0.2,1.0,0.3333,0.4286,1.0,0.6,0.0,0.0,0.0
9,playing,3,6,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
head_beh_df.shape[0], tail_beh_df.shape[0], few_shot_beh_df.shape[0]


(47, 157, 185)

In [38]:
def my_calculate_metrics(df, round_to=3, show_per_class=False):
    f1_values = multilabel_f1_score(
        torch.tensor(np.stack(df["pred"])),
        torch.tensor(np.stack(df["label"])),
        num_labels=14,
        average="none",
        zero_division=1,
    )
    # avg_map = round(map_values.mean().item(), round_to)
    avg_f1 = round(f1_values.mean().item(), round_to)
    if show_per_class:
        val_list = []

        for v in f1_values:
            val_list.append(round(v.item(), round_to))

        # return map_values
        return val_list
    return avg_f1


def calculate_overall_performance_per_coverage_segment(df, segment_dfs, split_df , behaviours):
    store = []

    for idx, label in enumerate(behaviours):
        subset_df = df[df["behaviour"] == label]
        for i, segment in enumerate(["head", "tail", "few_shot"]):
            segment_df = segment_dfs[i]
            segment_beh_df = segment_df[segment_df["utm"].isin(subset_df.utm.unique().tolist())]

            split_segment_beh_df = split_df[
                split_df["utm"].isin(segment_beh_df.utm.unique().tolist())
            ]
            try:
                f1, _, _,  = calculate_metrics(split_segment_beh_df["pred"].values, split_segment_beh_df["label"].values)
                f1 = f1[idx].item()
            except:
                f1 = "N/A"

            store.append(
                {
                    "behaviour": label,
                    "segment": segment,
                    "f1": f1,
                }
            )

    return pd.DataFrame(store).round(3)

In [39]:
train_performance_per_coverage_segment_excl = calculate_overall_performance_per_coverage_segment(
    df=exclusive_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=train_df
)

val_performance_per_coverage_segment_excl = calculate_overall_performance_per_coverage_segment(
    df=exclusive_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=val_df
)

train_performance_per_coverage_segment_overall = calculate_overall_performance_per_coverage_segment(
    df=overall_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=train_df
)

val_performance_per_coverage_segment_overall = calculate_overall_performance_per_coverage_segment(
    df=overall_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=val_df
)

In [40]:
train_performance_per_coverage_segment_mutual = calculate_overall_performance_per_coverage_segment(
    df=mutual_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=train_df
)

val_performance_per_coverage_segment_mutual = calculate_overall_performance_per_coverage_segment(
    df=mutual_df, segment_dfs=[head_beh_df, tail_beh_df, few_shot_beh_df], behaviours=behaviours,
    split_df=val_df
)

In [41]:
excl_beh_list = ['aggression',
 'camera_reaction',
 'climbing',
 'feeding',
 'object_carrying',
 'resting',
 'tool_use',
 'travel']

mutual_beh_list = ['bipedal', 'display', 'grooming', 'piloerection', 'playing', 'vocalisation']

In [42]:
# merge the two dataframes  
train_performance_per_coverage_segment = train_performance_per_coverage_segment_mutual.merge(train_performance_per_coverage_segment_excl, on=["behaviour", "segment"], suffixes=("_mutual", "_exclusive"))
train_performance_per_coverage_segment["behaviour_type"] = "mutual"

train_performance_per_coverage_segment.loc[train_performance_per_coverage_segment["behaviour"].isin(excl_beh_list), "behaviour_type"] = "exclusive"


In [43]:
# merge the two dataframes together to get a single dataframe
val_performance_per_coverage_segment = val_performance_per_coverage_segment_mutual.merge(
    val_performance_per_coverage_segment_excl, on=["behaviour", "segment"], suffixes=("_mutual", "_exclusive")
)

val_performance_per_coverage_segment= val_performance_per_coverage_segment.merge(val_performance_per_coverage_segment_overall, on=["behaviour", "segment"])

# add new column to indicate if behaviour is mutual or exclusive
val_performance_per_coverage_segment["behaviour_type"] = "mutual"
val_performance_per_coverage_segment.loc[
    val_performance_per_coverage_segment["behaviour"].isin(excl_beh_list), "behaviour_type"
] = "exclusive"


In [22]:
val_performance_per_coverage_segment

Unnamed: 0,behaviour,segment,f1_mutual,f1_exclusive,f1,behaviour_type
0,aggression,head,0.454545,0.0,0.384615,exclusive
1,aggression,tail,0.5,0.5,0.5,exclusive
2,aggression,few_shot,,0.0,0.0,exclusive
3,bipedal,head,0.538462,0.0,0.4375,mutual
4,bipedal,tail,0.461538,0.0,0.352941,mutual
5,bipedal,few_shot,,0.0,0.0,mutual
6,camera_reaction,head,0.431373,0.0,0.423077,exclusive
7,camera_reaction,tail,0.757576,0.470588,0.66,exclusive
8,camera_reaction,few_shot,1.0,0.571429,0.625,exclusive
9,climbing,head,0.691099,0.0,0.683938,exclusive
