In [1]:
import os

import matplotlib.pyplot as plt
import pandas as pd
from data_utils import (
    read_files,
    results2df,
    return_ct_location_segments,
)

plt.style.use("science")

In [2]:
# model name
model_name = "slow_r50-w-negatives"
folder_path = "../dataset/results/"
metadata_file = "../dataset/metadata/metadata.csv"
behavioural_labels_file = "../dataset/metadata/behaviours.txt"
segements_file = "../dataset/metadata/segments.txt"

# save path
save_path = "../dataset/gradcam"

# check if the save path exists
if not os.path.exists(save_path):
    os.makedirs(save_path)


# list all result files in the folder which end with .pkl and contain the model name
result_info = {}

for file in os.listdir(folder_path):
    if file.endswith(".pkl") and model_name in file:
        if "-kinetics" not in file:
            epoch = file.split("_")[-2].split("=")[1]
        else:
            epoch = file.split("_")[-2].split("=")[1].split("-")[0]

        # get the split from the file name
        split = file.split("=")[-1].split(".")[0]

        # add model to the dictionary
        if model_name not in result_info:
            result_info[model_name] = {}
        # add epoch to the dictionary
        if epoch not in result_info[model_name]:
            result_info[model_name][epoch] = {}
        if split not in result_info[model_name][epoch]:
            result_info[model_name][epoch][split] = {}
        result_info[model_name][epoch][split] = {
            "file_path": os.path.join(folder_path, file),
        }

In [3]:
result_info

{'slow_r50-w-negatives': {'100': {'val': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=100_split=val.pkl'},
   'train': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=100_split=train.pkl'}},
  '200': {'train': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=200_split=train.pkl'},
   'val': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=200_split=val.pkl'}},
  '0': {'train': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=0-kinetics_split=train.pkl'},
   'val': {'file_path': '../dataset/results/model=slow_r50-w-negatives_e=0-kinetics_split=val.pkl'}}}}

In [4]:
metadata_df = pd.read_csv(metadata_file)

with open(behavioural_labels_file, "rb") as f:
    behaviours = [beh.decode("utf-8").strip() for beh in f.readlines()]

with open(segements_file, "rb") as f:
    segments = [seg.decode("utf-8").strip() for seg in f.readlines()]

# build dict for behavioural where key is the segment and value is the behaviour is the same index
behavioural_dict = {}
for i, (b, s) in enumerate(zip(behaviours, segments)):
    if s not in behavioural_dict:
        behavioural_dict[s] = []
    behavioural_dict[s].append(b)

In [5]:
# add for each segment "none"
for s in ["head", "tail", "few_shot"]:
    behavioural_dict[s].append("none")


print(behavioural_dict)

{'few_shot': ['aggression', 'display', 'piloerection', 'playing', 'none'], 'tail': ['bipedal', 'camera_reaction', 'climbing', 'feeding', 'grooming', 'object_carrying', 'tool_use', 'vocalisation', 'none'], 'head': ['resting', 'travel', 'none']}


In [6]:
train_data, val_data = read_files(result_info[model_name], '100')
train_df = results2df(train_data, val_data, metadata_df)


train_data, val_data = read_files(result_info[model_name], '100')
train_df, val_df = results2df(train_data, val_data, metadata_df)

train_segments = {}
val_segments = {}

# Modified segment calculations
th_df, tt_df, tf_df = return_ct_location_segments(train_df, head=50, tail=10)

vh_df = val_df[val_df["utm"].isin(th_df["utm"])]
vt_df = val_df[val_df["utm"].isin(tt_df["utm"])]
vf_df = val_df[val_df["utm"].isin(tf_df["utm"])]

train_segments["h"] = th_df.merge(train_df, on="utm", how="left").dropna()
train_segments["t"] = tt_df.merge(train_df, on="utm", how="left").dropna()
train_segments["f"] = tf_df.merge(train_df, on="utm", how="left").dropna()

val_segments["h"] = vh_df
val_segments["t"] = vt_df
val_segments["f"] = vf_df

In [7]:
val_segments["h"]

Unnamed: 0,name,split,pred,feat,negative,subject_id,country,research_site,location_metadata,habitat,...,time_hr,time_min,age_sex_group,site,longitude,latitude,utm,value,label,location_count
2287,acp0000964.mp4,val,"[tensor(4.2289e-06), tensor(3.4093e-05), tenso...","[0.10806918, 0.6039582, 0.76296186, 0.06044927...",False,acp0000964.mp4,drc,bili,trail,"forest - mixed, open understorey",...,8.0,5.0,adult male,bili,243360,479159,0243360_0479159,"resting,grooming,travel,camera_reaction","[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0]",36
2288,acp0000967.mp4,val,"[tensor(1.5724e-07), tensor(0.0097), tensor(1....","[0.2089896, 0.23225425, 0.72199917, 0.04871598...",False,acp0000967.mp4,drc,bili,trail,"forest - mixed, open understorey",...,8.0,5.0,adult male,bili,243360,479159,0243360_0479159,"camera_reaction,travel","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",36
2289,acp0000973.mp4,val,"[tensor(1.7535e-07), tensor(6.0536e-08), tenso...","[0.23439923, 0.46743187, 0.66010535, 0.1406166...",False,acp0000973.mp4,drc,bili,trail,"forest - mixed, open understorey",...,8.0,6.0,adult male,bili,243360,479159,0243360_0479159,"travel,resting,travel","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0]",36
2290,acp00009hj.mp4,val,"[tensor(1.2369e-06), tensor(1.7304e-05), tenso...","[1.4686788, 0.9841393, 0.22109707, 0.33854598,...",False,acp00009hj.mp4,drc,bili,trail,"forest - mixed, open understorey",...,12.0,22.0,adult male,bili,243360,479159,0243360_0479159,"camera_reaction,bipedal,travel","[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",36
2291,acp00009j7.mp4,val,"[tensor(2.5418e-05), tensor(5.0572e-06), tenso...","[0.0981734, 0.3040191, 0.34150535, 0.07969461,...",False,acp00009j7.mp4,drc,bili,trail,"forest - mixed, open understorey",...,6.0,48.0,adult male,bili,243360,479159,0243360_0479159,travel,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3326,acp00083xx.mp4,val,"[tensor(0.0003), tensor(0.0002), tensor(0.0020...","[0.017916184, 0.0917435, 0.27791768, 0.2284452...",True,acp00083xx.mp4,liberia,sapo,trail,"forest - mixed, closed understorey",...,16.0,29.0,juvenile unclear,sapo,522043,588350,0522043_0588350,none,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",95
3327,acp00084al.mp4,val,"[tensor(0.0008), tensor(0.0018), tensor(0.0017...","[0.16039194, 0.045875385, 0.013781786, 0.02096...",True,acp00084al.mp4,liberia,sapo,trail,"forest - mixed, closed understorey",...,13.0,41.0,unidentifiable unidentifiable,sapo,522043,588350,0522043_0588350,none,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",95
3328,acp00084an.mp4,val,"[tensor(0.0009), tensor(0.0018), tensor(0.0020...","[0.18607725, 0.036133155, 0.015193998, 0.02826...",True,acp00084an.mp4,liberia,sapo,trail,"forest - mixed, closed understorey",...,13.0,41.0,unidentifiable unidentifiable,sapo,522043,588350,0522043_0588350,none,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",95
3329,acp000848t.mp4,val,"[tensor(1.7281e-06), tensor(5.6676e-06), tenso...","[0.006326366, 0.079311155, 1.0393736, 0.026168...",False,acp000848t.mp4,liberia,sapo,trail,"forest - mixed, closed understorey",...,13.0,57.0,adult female,sapo,522043,588350,0522043_0588350,"feeding,resting,object_carrying","[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0]",95


In [8]:
# Initialize an empty list to hold all the sample data
samples_list = []

# Loop through the segments and behavioral dictionaries
for cam_seg in ["h", "t", "f"]:
    for beh_seg, beh_list in behavioural_dict.items():
        for b in beh_list:
            # Filter the DataFrame for rows containing the behavior
            df = val_segments[cam_seg][val_segments[cam_seg]["value"].str.contains(b)]

            # Check if the DataFrame is not empty before sampling
            if not df.empty:
                # Take only one sample select name and label columns
                sample = df.sample(1)[["name", "label"]].values[0]
                name = sample[0]
                label = list(sample[1])

                # convert the label list to a string and separate the labels with a comma
                label_str = ", ".join([f"{l}" for l in label])

                # Add the sample data as a dictionary to the list
                samples_list.append(
                    {
                        "cam_seg": cam_seg,
                        "beh_seg": beh_seg,
                        "behavior": b,
                        "name": name,
                        "label": f"[{label_str}]",
                    }
                )

# Convert the list of dictionaries to a DataFrame
samples_df = pd.DataFrame(samples_list)

cam_seg_order = ["h", "t", "f"]
beh_seg_order = ["head", "tail", "few_shot"]
# Display the final DataFrame


# sort in the order of the segments
samples_df["cam_seg"] = pd.Categorical(samples_df["cam_seg"], cam_seg_order)
samples_df["beh_seg"] = pd.Categorical(samples_df["beh_seg"], beh_seg_order)

samples_df = samples_df.sort_values(["cam_seg", "beh_seg"])


print(samples_df)

   cam_seg   beh_seg         behavior            name  \
14       h      head          resting  acp000dmac.mp4   
15       h      head           travel  acp000dn1b.mp4   
16       h      head             none  acp000cbiv.mp4   
5        h      tail          bipedal  acp000bb7k.mp4   
6        h      tail  camera_reaction  acp0000a5y.mp4   
7        h      tail         climbing  acp000dtsf.mp4   
8        h      tail          feeding  acp000dtd3.mp4   
9        h      tail         grooming  acp000dktw.mp4   
10       h      tail  object_carrying  acp000basy.mp4   
11       h      tail         tool_use  acp0005a9y.mp4   
12       h      tail     vocalisation  acp000cexk.mp4   
13       h      tail             none  acp00054lm.mp4   
0        h  few_shot       aggression  acp0005a9y.mp4   
1        h  few_shot          display  acp0005a6i.mp4   
2        h  few_shot     piloerection  acp000cbhu.mp4   
3        h  few_shot          playing  acp000drjw.mp4   
4        h  few_shot           

In [9]:
# save samples_df
samples_df.to_csv(os.path.join(save_path, "samples_df.csv"), index=False) # quotechar="'")
# remove cam_seg, beh_seg and behavior columns
samples_df = samples_df.drop(columns=["cam_seg", "beh_seg", "behavior"])
# save without headers
samples_df.to_csv(os.path.join(save_path, "test.csv"), header=False, index=False)# quotechar="'")