In [2]:
import ast
import inflect
import calendar
import pandas as pd
from glob import glob
from tqdm import tqdm
from skmultilearn.model_selection import iterative_train_test_split

In [2]:
p = inflect.engine()

**Establish all long videos that are annotated and on disc**

In [3]:
# Videos stored on disc/jade2
videos_on_disc = pd.read_csv("data/external/videos_on_disc.csv")
videos_on_disc.drop(columns=["Unnamed: 0"], inplace=True)
videos_on_disc["videos"] = videos_on_disc["videos"].str.lower()
videos_on_disc["videos"] = videos_on_disc.videos.apply(lambda x: x.split(".")[0])

In [4]:
# Maureens annotations
csv_files = glob("data/sites/csv/**/*.csv", recursive=True)
sorted_csv_files = sorted(csv_files, key=lambda x: x.split("/")[-1])
initialiser, remainder = sorted_csv_files[0], sorted_csv_files[1:]
mm_df = pd.read_csv(initialiser, encoding="ISO-8859-1")
for file in remainder:
    site = pd.read_csv(file, encoding="ISO-8859-1")
    mm_df = pd.concat([mm_df, site])
mm_df["subdir_video"] = (
    mm_df.subfolder.astype(str) + "_" + mm_df.video_file_name.astype(str)
)
mm_df["subdir_video"] = mm_df.subdir_video.str.lower()

mm_df["prepend_zero"] = mm_df.subdir_video.apply(
    lambda x: f'{"_".join(x.split("_")[:-1])}_{"0" + x.split("_")[-1]}'
)

In [5]:
mm_df[mm_df.subdir_video == "cmnp_cam09_606473_246762_20140821_ek000140"]

Unnamed: 0,new_row_id,country,research_site,genus,species,cam_coverage_area,location_metadata,habitat,utm_zone,utm_long,...,tool_use,vocalization,bipedal,camera_reaction,behavioral_context,other_species,additional_comments,record_type,subdir_video,prepend_zero
519,520,cameroon,campo_maan,Pan,troglodytes troglodytes,24.36,termite site,"forest - mixed, open understorey",32n,606473.0,...,yes,no,no,no,tool use,,tool use suspected; not clearly visible,60s_video,cmnp_cam09_606473_246762_20140821_ek000140,cmnp_cam09_606473_246762_20140821_0ek000140
520,521,cameroon,campo_maan,Pan,troglodytes troglodytes,24.36,termite site,"forest - mixed, open understorey",32n,606473.0,...,yes,no,no,no,tool use,,tool use suspected; not clearly visible,60s_video,cmnp_cam09_606473_246762_20140821_ek000140,cmnp_cam09_606473_246762_20140821_0ek000140


In [22]:
# All annotations for old and new platforms
all_clip_info = pd.read_csv(
    "../maureen_annotations/data/external/all_cs_clip_information.txt",
    sep="\t",
)

all_clip_info.subject_id = all_clip_info.subject_id.str.lower()
all_clip_info["video_id"] = all_clip_info["video.id"].astype(str).str.lower()
all_clip_info["video_id"] = all_clip_info.video_id.apply(
    lambda x: "_".join(x.split("/")[1:])
)
all_clip_info["video_id"] = all_clip_info.video_id.apply(lambda x: x.split(".")[0])

In [7]:
all_clip_info

Unnamed: 0,subject_id,start.time,site,video.id,tags,classifications,behavior,video_id
0,60886059,0.0,bafing,BAF_A8/BAF_vid16_0340989_1432398_20151114/1201...,,"CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,NOTHINGHERE",TRAVELING,baf_vid16_0340989_1432398_20151114_12010008
1,60886061,15.0,bafing,BAF_A8/BAF_vid16_0340989_1432398_20151114/1201...,,"NOTHINGHERE,NOTHINGHERE,NOTHINGHERE",,baf_vid16_0340989_1432398_20151114_12010008
2,60886060,30.0,bafing,BAF_A8/BAF_vid16_0340989_1432398_20151114/1201...,,"NOTHINGHERE,NOTHINGHERE,NOTHINGHERE",,baf_vid16_0340989_1432398_20151114_12010008
3,60886062,45.0,bafing,BAF_A8/BAF_vid16_0340989_1432398_20151114/1201...,,"NOTHINGHERE,NOTHINGHERE,NOTHINGHERE",,baf_vid16_0340989_1432398_20151114_12010008
4,60886063,0.0,bafing,BAF_A8/BAF_vid16_0340989_1432398_20151114/1201...,"floridpostern,chimp,1_chimp","CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,MO...",TRAVELING,baf_vid16_0340989_1432398_20151114_12010009
...,...,...,...,...,...,...,...,...
33769,acp0004tfa,45.0,tair,tair_E4/tair_cam22_688836_647457_20131025/EK00...,"0_chimp,chimp,muddyfrost,tool_usage","blank,blank,blank",,tair_cam22_688836_647457_20131025_ek000246
33770,acp0004tfv,0.0,tair,tair_E4/tair_cam22_688836_647457_20131025/EK00...,"1_chimp,camera_reaction,camtouch,chimp,dailyzo...","chimpanzee,chimpanzee,chimpanzee,chimpanzee,ch...","camera reaction,on the ground,playing,resting",tair_cam22_688836_647457_20131025_ek000248
33771,acp0004tfw,15.0,tair,tair_E4/tair_cam22_688836_647457_20131025/EK00...,"1_chimp,camera_reaction,camtouch,chimp,juvenil...","chimpanzee,chimpanzee,chimpanzee,chimpanzee,ch...","camera reaction,on the ground,playing",tair_cam22_688836_647457_20131025_ek000248
33772,acp0004tfx,30.0,tair,tair_E4/tair_cam22_688836_647457_20131025/EK00...,,"blank,blank,blank",,tair_cam22_688836_647457_20131025_ek000248


In [8]:
all_clip_info = all_clip_info[
    all_clip_info.video_id.isin(videos_on_disc.videos.unique())
]

In [9]:
# This is all the clips that have been annotated and are on disc...
df = pd.concat(
    [
        all_clip_info[all_clip_info.video_id.isin(mm_df.subdir_video.unique())],
        all_clip_info[all_clip_info.video_id.isin(mm_df.prepend_zero.unique())],
    ]
).drop_duplicates()

**Add multirow metadata i.e., age, sex**


In [10]:
videos = (
    mm_df.groupby("subdir_video")["age_class"].apply(lambda x: list(x)).index.values
)
age_groups = (
    mm_df.groupby("subdir_video")["age_class"]
    .apply(
        lambda x: ",".join(list([str(i) for i in x]))
        if len(list(x)) > 1
        else list(x)[0]
    )
    .values
)
sex_groups = (
    mm_df.groupby("subdir_video")["sex"]
    .apply(
        lambda x: ",".join(list([str(i) for i in x]))
        if len(list(x)) > 1
        else list(x)[0]
    )
    .values
)

df = df.merge(
    pd.DataFrame(
        {"subdir_video": videos, "age_groups": age_groups, "sex_groups": sex_groups}
    ),
    left_on="video_id",
    right_on="subdir_video",
    how="inner",
).drop_duplicates()

In [11]:
meta_attr = [
    "subdir_video",
    "country",
    "research_site",
    "genus",
    "species",
    "location_metadata",
    "habitat",
    "min_number_chimps_per_video",
    "max_number_chimps_per_video",
    "day",
    "month",
    "year",
    "time_hr",
    "time_min",
]

**Add single row metadata**

In [12]:
df = df.merge(
    mm_df[meta_attr], left_on="video_id", right_on="subdir_video", how="inner"
).drop_duplicates()

df.rename(
    columns={
        "max_number_chimps_per_video": "max",
        "min_number_chimps_per_video": "min",
    },
    inplace=True,
)

**Add MM's behavioural information**

In [13]:
mm_df.rename(
    columns={
        "tool_use": "tool_use_mm",
        "vocalization": "vocalization_mm",
        "bipedal": "bipedal_mm",
        "camera_reaction": "camera_reaction_mm",
    },
    inplace=True,
)

mm_df.tool_use_mm.fillna(False, inplace=True)
mm_df.tool_use_mm.replace("no", False, inplace=True)
mm_df.tool_use_mm.replace("yes", True, inplace=True)

mm_df.vocalization_mm.fillna(False, inplace=True)
mm_df.vocalization_mm.replace("no", False, inplace=True)
mm_df.vocalization_mm.replace("yes", True, inplace=True)

mm_df.bipedal_mm.fillna(False, inplace=True)
mm_df.bipedal_mm.replace("no", False, inplace=True)
mm_df.bipedal_mm.replace("yes", True, inplace=True)

mm_df.camera_reaction_mm.fillna(False, inplace=True)
mm_df.camera_reaction_mm.replace("no", False, inplace=True)
mm_df.camera_reaction_mm.replace("yes", True, inplace=True)

mm_df.behavioral_context.fillna(False, inplace=True)
mm_df.behavioral_context.replace("no", False, inplace=True)
mm_df.behavioral_context.replace("yes", True, inplace=True)

behaviour_attr = [
    "subdir_video",
    "tool_use_mm",
    "vocalization_mm",
    "bipedal_mm",
    "camera_reaction_mm",
    "behavioral_context",
    "additional_comments",
]

df = df.merge(
    mm_df[behaviour_attr], left_on="video_id", right_on="subdir_video", how="inner"
).drop_duplicates()

**Add standard multilabel behaviours**

In [14]:
df["bt"] = df.behavior.astype(str) + "," + df.tags.astype(str)
df.bt = df.bt.str.lower()
df["split_tags"] = df.bt.str.split(",")
df.split_tags.fillna("", inplace=True)

In [15]:
tag_df = pd.read_csv("data/internal/all_tags_behaviours.csv")

for col in tag_df.columns[1:]:
    unique_tags = tag_df[tag_df[col] == col].Tags.unique()
    df[col] = df.split_tags.apply(
        lambda x: 1 if any(i in x for i in unique_tags) else 0
    )

# Create column indicating empties
df["label_indicator"] = (
    df[tag_df.columns[1:]]
    .astype(int)
    .apply(lambda x: True if any([i for i in x]) else False, axis=1)
)

**Categorise into parent classes**

In [16]:
df["p_camera_reaction"] = df.camera_reaction == 1
df["p_tool_use"] = (
    (df.tool_use == 1) | (df.termite_fishing == 1) | (df.nut_cracking == 1)
)
df["p_object_carrying"] = df.object_carry == 1
df["p_bipedal"] = df.bipedal == 1
df["p_feeding"] = (df.feeding == 1) | (df.wood_eating == 1)
df["p_chimp_carrying"] = df.chimp_carrying == 1
df["p_vocalisation"] = (df.vocalisation == 1) | (df.hoot == 1) | (df.grunt == 1)
df["p_climbing"] = df.climbing == 1
df["p_aggression"] = (df.aggression == 1) | (df.charge == 1) | (df.fight == 1)
df["p_travel"] = (df.travel == 1) | (df.running == 1) | (df.walking == 1)
df["p_sex"] = (df.sex == 1) | (df.mounting == 1)
df["p_piloerection"] = df.piloerection == 1
df["p_social_interaction"] = (df.social_interaction == 1) | (df.nursing == 1)
df["p_grooming"] = df.grooming == 1
df["p_display"] = (
    (df.display == 1)
    | (df.branch_shaking == 1)
    | (df.stone_throw == 1)
    | (df.drumming == 1)
)
df["p_cross_species_interaction"] = df.cross_species_interaction == 1
df["p_resting"] = df.resting == 1
df["p_playing"] = df.playing == 1
df["p_no_behaviour"] = (df.label_indicator == False) | (df.no_behaviour == 1)


df.drop(columns=["camera_reaction"], inplace=True)
df.drop(columns=["tool_use", "termite_fishing", "nut_cracking"], inplace=True)
df.drop(columns=["object_carry"], inplace=True)
df.drop(columns=["bipedal"], inplace=True)
df.drop(columns=["feeding", "wood_eating"], inplace=True)
df.drop(columns=["chimp_carrying"], inplace=True)
df.drop(columns=["vocalisation", "hoot", "grunt"], inplace=True)
df.drop(columns=["climbing"], inplace=True)
df.drop(columns=["aggression", "charge", "fight"], inplace=True)
df.drop(columns=["travel", "running", "walking"], inplace=True)
df.drop(columns=["sex", "mounting"], inplace=True)
df.drop(columns=["piloerection"], inplace=True)
df.drop(columns=["social_interaction", "nursing"], inplace=True)
df.drop(columns=["grooming"], inplace=True)
df.drop(columns=["display", "branch_shaking", "stone_throw", "drumming"], inplace=True)
df.drop(columns=["cross_species_interaction"], inplace=True)
df.drop(columns=["resting"], inplace=True)
df.drop(columns=["playing"], inplace=True)
df.drop(columns=["no_behaviour"], inplace=True)

In [17]:
behaviours = [
    "p_camera_reaction",
    "p_tool_use",
    "p_object_carrying",
    "p_bipedal",
    "p_feeding",
    "p_chimp_carrying",
    "p_vocalisation",
    "p_climbing",
    "p_aggression",
    "p_travel",
    "p_sex",
    "p_piloerection",
    "p_social_interaction",
    "p_grooming",
    "p_display",
    "p_cross_species_interaction",
    "p_resting",
    "p_playing",
]

In [19]:
# Merge 15-second rows into 1 representative multilabel
store = []
for video in tqdm(df.video_id.unique()):
    tmp = df[df.video_id == video]
    multilabel = tmp[behaviours].sum().ge(1).view("i1").values
    store.append(dict(video_id=video, label=multilabel))
df = df.merge(pd.DataFrame(store), on="video_id", how="left")
df.label = df.label.apply(lambda x: str(x.tolist()))

  0%|          | 0/6857 [00:00<?, ?it/s]

100%|██████████| 6857/6857 [01:05<00:00, 104.75it/s]


In [32]:
df.to_csv("assign_full_video_multilabel.csv", index=False)

### **Metadata/text only model and video analogue**

In [3]:
df = pd.read_csv("assign_full_video_multilabel.csv")
df.label = df.label.apply(lambda x: ast.literal_eval(x))

  df = pd.read_csv("assign_full_video_multilabel.csv")


In [28]:
# Filter videos that need removing
videos_to_remove = pd.read_csv("videos_to_remove.csv")
df = df[~df.video_id.isin(videos_to_remove.videos_to_remove.unique())]
df = df[~df.video_id.isin(["djo_cam09_0698421_0598444_20130109_pict0017"])]
df = df[~df.month.isna()]

In [29]:
df["sp"] = df.tags.str.lower()
df["sp"] = df.sp.str.split(",")
df.sp.fillna("", inplace=True)
df.sp = df.sp.apply(lambda x: list(enumerate(x)))

df["sb"] = df.behavior.str.lower()
df["sb"] = df.sb.str.split(",")
df.sb.fillna("", inplace=True)
df.sb = df.sb.apply(lambda x: list(enumerate(x)))

df["split_tags"] = df.sp + df.sb
df["split_tags"] = df.split_tags.apply(lambda x: sorted(x, key=lambda x: x[0]))

In [None]:
def get_new_tags(x, tag_df):
    store = []
    for i, t in enumerate(x):
        tmp = tag_df[tag_df.Tags == t[-1]].drop_duplicates()
        if len(tmp.index) == 0:
            continue
        else:
            extract = [x for x in tmp.values[0, 1:] if isinstance(x, str)]
            if not extract:
                continue
            else:
                store.append((t[0], extract))
    return store


df["new_tags"] = df.split_tags.apply(lambda x: get_new_tags(x, tag_df))
df.new_tags = df.new_tags.apply(lambda x: str(x.tolist()))
df.label = df.label.apply(lambda x: str(x.tolist()))
df.to_csv("clips_w_temporally_aligned_behaviours_tmp.csv", index=False)

**Reload above**

In [15]:
df = pd.read_csv("clips_w_temporally_aligned_behaviours_tmp.csv")
# df.label = df.label.apply(lambda x: ast.literal_eval(x))
# df.new_tags = df.new_tags.apply(lambda x: ast.literal_eval(x))
# df.new_tags = df.new_tags.apply(lambda x: sorted(x, key=lambda x: x[0]))

  df = pd.read_csv("clips_w_temporally_aligned_behaviours_tmp.csv")


In [32]:
pd.set_option("max_colwidth", 400)
df[df.video_id == "ngo_vid27_0216382_0050822_20130215_pict0002"][
    ["video.id", "subject_id", "video_id"]
].subject_id.unique()

array(['acp000dbgq', 'acp000dbgr', 'acp000dbgs', 'acp000dbgt',
       'acp000dbvi', 'acp000dbvk', 'acp000dbvm', 'acp000dbvn'],
      dtype=object)

In [None]:
behaviours = [
    "p_camera_reaction",
    "p_tool_use",
    "p_object_carrying",
    "p_bipedal",
    "p_feeding",
    "p_chimp_carrying",
    "p_vocalisation",
    "p_climbing",
    "p_aggression",
    "p_travel",
    "p_sex",
    "p_piloerection",
    "p_social_interaction",
    "p_grooming",
    "p_display",
    "p_cross_species_interaction",
    "p_resting",
    "p_playing",
]

In [None]:
conditions_dict = {
    "camera_reaction": "camera_reaction",
    "tool_use": "(tool_use) | (termite_fishing) | (nut_cracking)",
    "object_carrying": "object_carry",
    "bipedal": "bipedal",
    "feeding": "(feeding) | (wood_eating)",
    "chimp_carrying": "chimp_carrying",
    "vocalisation": "(vocalisation) | (hoot) | (grunt)",
    "climbing": "climbing",
    "aggression": "(aggression) | (charge) | (fight)",
    "travel": "(travel) | (running) | (walking)",
    "sex": "(sex) | (mounting)",
    "piloerection": "piloerection",
    "social_interaction": "(social_interaction) | (nursing)",
    "grooming": "grooming",
    "display": "(display) | (branch_shaking) | (stone_throw) | (drumming)",
    "cross_species_interaction": "cross_species_interaction",
    "resting": "resting",
    "playing": "playing",
    "no_behaviour": "(label_indicator == False) | (no_behaviour)",
}

reversed_dict = {}
for key, value in conditions_dict.items():
    conditions = [condition.strip() for condition in value.split("|")]
    for condition in conditions:
        reversed_dict[condition] = key

modified_dict = {}
for key, value in reversed_dict.items():
    modified_key = key.replace("(", "").replace(")", "").strip()
    modified_dict[modified_key] = value

# Remove 'label_indicator' and 'no_behaviour' keys from modified_dict
modified_dict.pop("label_indicator == False")
modified_dict.pop("no_behaviour")

In [None]:
def get_parent_tags(x, modified_dict):
    store = []
    for t in x:
        if t[-1][0] in modified_dict.keys():
            t[-1][0] = modified_dict[t[-1][0]]
            store.append(t)
    return store


def order_tags(x):
    store = []
    tag_num = len(x)
    for i in range(tag_num):
        if x[i][1][0] not in store:
            store.append(x[i][1][0])
    return store


# Decode multi hot binary labels to class labels
def decode_label(x, behaviours):
    decoded_behaviours = []
    idxs = np.where(x)
    for idx in idxs[0]:
        decoded_behaviours.append(behaviours[idx].split("p_")[-1])
    return decoded_behaviours


df["parent_new_tags"] = df.new_tags.apply(lambda x: get_parent_tags(x, modified_dict))
df["ordered_tags"] = df.parent_new_tags.apply(lambda x: order_tags(x))
df["decoded_labels"] = df.label.apply(lambda x: decode_label(x, behaviours))

In [None]:
pd.set_option("max_colwidth", 400)
df.ordered_tags = df.ordered_tags.apply(lambda x: str(x))
df.decoded_labels = df.decoded_labels.apply(lambda x: str(x))
df[
    ["video_id", "subject_id", "start.time", "ordered_tags", "max", "decoded_labels"]
].drop_duplicates().tail(55)

In [None]:
df.behavior

In [None]:
df[df.video_id == "baf_vid16_0340989_1432398_20151114_12010009"][
    ["start.time", "new_tags", "ordered_tags", "sb", "label"]
]

**Create video dataset**

In [None]:
import ast
import numpy as np

vdf = df[["video_id", "label"]]
vdf.label = vdf.label.apply(lambda x: x[:-1])  # let 0 vector represent no behaviour
vdf.label = vdf.label.apply(lambda x: str(list(x)))
vdf = vdf.drop_duplicates()


vdf.label = vdf.label.apply(lambda x: ast.literal_eval(x))
labels = np.array(list(vdf.label.values))
X = vdf.video_id.to_numpy().reshape((6675, 1))

assert len(X) == len(labels)

X_train, y_train, X_test, y_test = iterative_train_test_split(X, labels, test_size=0.30)
X_test, y_test, X_val, y_val = iterative_train_test_split(
    X_test, y_test, test_size=0.33
)
y_train, y_test, y_val = (
    [str(list(x)) for x in y_train],
    [str(list(x)) for x in y_test],
    [str(list(x)) for x in y_val],
)

In [None]:
train_vdf = pd.DataFrame({"video": X_train[:, 0], "label": y_train})
val_vdf = pd.DataFrame({"video": X_val[:, 0], "label": y_val})
test_vdf = pd.DataFrame({"video": X_test[:, 0], "label": y_test})

In [None]:
train_vdf.to_csv("data/annotations/video_only/train.csv", index=False)
val_vdf.to_csv("data/annotations/video_only/val.csv", index=False)
test_vdf.to_csv("data/annotations/video_only/test.csv", index=False)

**Create meta-text dataset**

In [None]:
composition = ["age_groups", "sex_groups", "min", "max"]
location = ["country", "research_site", "location_metadata", "habitat"]
time = ["day", "month", "year", "time_hr", "time_min"]
tdf = df[["video_id"] + composition + location + time + ["label"]]
tdf.label = tdf.label.apply(lambda x: x[:-1])  # let 0 vector represent no behaviour
tdf.label = tdf.label.apply(lambda x: str(list(x)))
tdf.drop_duplicates(inplace=True)

**Age cats: 'unidentifiable', 'infant', 'juvenile', 'adolescent', 'adult'**


In [None]:
def count_individual_age(x, age):
    if age in x:
        return x.count(age)
    else:
        return 0

In [None]:
ages = ["unidentifiable", "infant", "juvenile", "adolescent", "adult"]
tdf.age_groups.fillna("", inplace=True)
tdf["unidentifiable_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "unidentifiable")
)
tdf["infant_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "infant")
)
tdf["juvenile_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "juvenile")
)
tdf["adolescent_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "adolescent")
)
tdf["adult_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "adult")
)

**Sex cats: 'unclear', 'unidentifiable', 'male', 'female'**

In [None]:
def count_individual_sex(x, sex):
    if sex in x:
        return x.count(sex)
    else:
        return 0

In [None]:
sexes = ["unclear", "unidentifiable", "male", "female"]
tdf.sex_groups.fillna("", inplace=True)
tdf["unclear_count"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "unclear")
)
tdf["unidentifiable_count"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "unidentifiable")
)
tdf["male"] = tdf.sex_groups.apply(lambda x: count_individual_sex(x.split(","), "male"))
tdf["female"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "female")
)

In [None]:
def group_age_sex(age, sex):
    assert len(age.split(",")), len(sex.split(","))
    pairings = []
    for age, sex in zip(age.split(","), sex.split(",")):
        pair = f"{age} {sex}"
        pairings.append(pair)
    return ",".join(pairings)


tdf["age_sex_group"] = tdf.apply(
    lambda x: group_age_sex(x.age_groups, x.sex_groups), axis=1
)

In [None]:
as_group = []
for group in tdf["age_sex_group"].unique():
    for g in group.split(","):
        as_group.append(g)
as_group = list(set(as_group))
as_group = [i for i in as_group if i != " "]
print(as_group)

In [None]:
def count_age_sex_pairs(x, g):
    tmp = x.split(",")
    count = tmp.count(g)
    return count


for g in as_group:
    tdf[f"{g}"] = tdf.age_sex_group.apply(lambda x: count_age_sex_pairs(x, g))

In [None]:
tdf["month"] = tdf.month.astype(int)
tdf["year"] = tdf.year.astype(int)
tdf.month = tdf.month.apply(lambda x: calendar.month_name[x])

In [None]:
def desc_composition(x, pairings):
    if x["max"] == 1:
        start = "A video of"
        for pair in pairings:
            if x[pair] == 1:
                start += f" {p.number_to_words(x[pair])} {pair} "
        start = start + "chimpanzee"
    elif x["max"] > 1:
        start = (
            f"A video of {p.number_to_words(int(x['max']))} chimpanzees, composed of"
        )
        tmp = []
        for pair in pairings:
            if x[pair] >= 1:
                tmp.append(pair)

        number_of_pairs = len(tmp)
        only_one_pair = True if len(set(tmp)) == 1 else False

        for i, pair in enumerate(tmp):
            if only_one_pair:
                start += f" {p.number_to_words(x[pair])} {pair}s"
            elif i == number_of_pairs - 1:
                start += f" and {p.number_to_words(x[pair])} {pair}s"
            else:
                if i == number_of_pairs - 2:
                    start += f" {p.number_to_words(x[pair])} {pair}s"
                else:
                    start += f" {p.number_to_words(x[pair])} {pair},"
        # start += f" {p.number_to_words(x[pair])} {pair}s,"
    return start


def desc_location(x):
    return f"It was filmed in {x['country']} at the {x['research_site']} research site"


def desc_habitat(x):
    desc = f"at a {x['location_metadata']} in {x['habitat']}"
    return desc


def desc_time(x):
    desc = f"on {x['day']} {x['month']} {x['year']} at {x['time_hr']}:{x['time_min']}."
    return desc

In [None]:
pairings = tdf.columns[24:-1]
tdf["desc"] = tdf.apply(
    lambda x: f"{desc_composition(x, pairings)}. {desc_location(x)} {desc_habitat(x)} {desc_time(x)}",
    axis=1,
)

In [None]:
tdf[tdf["max"] == 5][["max", "desc"]].desc.iloc[0]

In [None]:
train_tdf = train_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
train_tdf.rename(columns={"label_y": "label"}, inplace=True)
train_tdf.drop(columns=["label_x"], inplace=True)

val_tdf = val_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
val_tdf.rename(columns={"label_y": "label"}, inplace=True)
val_tdf.drop(columns=["label_x"], inplace=True)

test_tdf = test_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
test_tdf.rename(columns={"label_y": "label"}, inplace=True)
test_tdf.drop(columns=["label_x"], inplace=True)

In [None]:
train_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/train_text_only.csv", index=False
)

val_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/val_text_only.csv", index=False
)

test_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/test_text_only.csv", index=False
)

In [None]:
df[["video_id", "split_tags"]][
    df["video_id"] == "tair_cam22_688836_647457_20131025_ek000246"
].groupby("video_id").apply(lambda x: x.split_tags.values).iloc[0]

**Test embedding metadata**

In [None]:
from transformers import CLIPTokenizer, CLIPTextModel

In [None]:
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")

for module in text_model.text_model.encoder.layers[:-1].modules():
    for param in module.parameters():
        param.requires_grad = False

tokenized_text = tdf.descriptor.apply(
    lambda x: tokenizer(
        x, padding="max_length", max_length=77, truncation=True, return_tensors="pt"
    )
)

In [None]:
text_features = text_model(**tokenized_text[0])

In [None]:
#### This is for temporal processing of the dataset #####

In [None]:
test_df[
    ["video_id", "behavioral_context", "camera_" "start.time"]
    + list(test_df.columns[-19:])
]

In [None]:
collection = []
for video_name in test_df.video_id.unique():
    item = {}
    tmp = test_df[test_df.video_id == video_name]
    item["video_name"] = video_name
    item["metadata"] = dict(
        age_groups=tmp.age_groups, sex_groups=tmp.sex_groups, max=tmp.max, min=tmp.min
    )
    item["behaviour"] = tmp["start.time"].to_dict()
    collection.append(item)

In [None]:
collection[0]["metadata"]["age_groups"]