In [1]:
import ast
import inflect
import calendar
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
from skmultilearn.model_selection import iterative_train_test_split

In [2]:
p = inflect.engine()

In [3]:
df = pd.read_csv("clips_w_temporally_aligned_behaviours.csv")
df.label = df.label.apply(lambda x: ast.literal_eval(x))
df.ordered_tags = df.ordered_tags.apply(lambda x: ast.literal_eval(x))
df.decoded_labels = df.decoded_labels.apply(lambda x: ast.literal_eval(x))

In [4]:
vldf = None
strange = []
for video_id in tqdm(df.video_id.unique()):
    tmp = df[df.video_id == video_id]
    behaviours = tmp.ordered_tags.values
    if len(behaviours) != 4:
        strange.append(dict(video_id=video_id, num=len(behaviours)))
        continue
    store = dict(video_id=video_id)
    for i, b in enumerate(behaviours):
        try:
            store[f"clip_{i}"] = ",".join(b)
        except:
            store[f"clip_{i}"] = ""
    if vldf is None:
        vldf = pd.DataFrame(store, index=[0])
    else:
        vldf = pd.concat([vldf, pd.DataFrame(store, index=[0])])

100%|██████████| 6675/6675 [00:32<00:00, 206.40it/s]


In [5]:
pd.set_option("max_colwidth", 400)
vldf[["video_id", "clip_0", "clip_1", "clip_2", "clip_3"]]

Unnamed: 0,video_id,clip_0,clip_1,clip_2,clip_3
0,baf_vid16_0340989_1432398_20151114_12010008,travel,,,
0,baf_vid16_0340989_1432398_20151114_12010009,travel,,"camera_reaction,playing,resting,travel",
0,baf_vid16_0340989_1432398_20151114_12010012,"chimp_carrying,travel",,,
0,baf_vid19_0340963_1432403_20151114_11220023,,travel,"climbing,travel",resting
0,baf_vid19_0340963_1432403_20151114_11220024,"climbing,travel","climbing,travel",,
...,...,...,...,...,...
0,tair_cam22_688836_647457_20131025_ek000222,"feeding,resting,social_interaction,tool_use,travel","feeding,resting,tool_use","feeding,playing,resting,social_interaction,tool_use","feeding,resting,tool_use"
0,tair_cam22_688836_647457_20131025_ek000238,resting,"camera_reaction,travel",,
0,tair_cam22_688836_647457_20131025_ek000239,"camera_reaction,travel,vocalisation",,,
0,tair_cam22_688836_647457_20131025_ek000245,"tool_use,feeding","feeding,resting,tool_use","cross_species_interaction,feeding,resting,tool_use","feeding,tool_use,travel"


**Create meta-text dataset**

In [6]:
composition = ["age_groups", "sex_groups", "min", "max"]
location = ["country", "research_site", "location_metadata", "habitat"]
time = ["day", "month", "year", "time_hr", "time_min"]
tdf = df[["video_id"] + composition + location + time + ["label"]]
tdf.label = tdf.label.apply(lambda x: str(list(x)))
tdf.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf.label = tdf.label.apply(lambda x: str(list(x)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf.drop_duplicates(inplace=True)


**Age cats: 'unidentifiable', 'infant', 'juvenile', 'adolescent', 'adult'**


In [7]:
def count_individual_age(x, age):
    if age in x:
        return x.count(age)
    else:
        return 0


ages = ["unidentifiable", "infant", "juvenile", "adolescent", "adult"]
tdf.age_groups.fillna("", inplace=True)
tdf["unidentifiable_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "unidentifiable")
)
tdf["infant_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "infant")
)
tdf["juvenile_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "juvenile")
)
tdf["adolescent_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "adolescent")
)
tdf["adult_count"] = tdf.age_groups.apply(
    lambda x: count_individual_age(x.split(","), "adult")
)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf.age_groups.fillna("", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["unidentifiable_count"] = tdf.age_groups.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["infant_count"] = tdf.age_groups.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

Se

**Sex cats: 'unclear', 'unidentifiable', 'male', 'female'**

In [8]:
def count_individual_sex(x, sex):
    if sex in x:
        return x.count(sex)
    else:
        return 0


sexes = ["unclear", "unidentifiable", "male", "female"]
tdf.sex_groups.fillna("", inplace=True)
tdf["unclear_count"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "unclear")
)
tdf["unidentifiable_count"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "unidentifiable")
)
tdf["male"] = tdf.sex_groups.apply(lambda x: count_individual_sex(x.split(","), "male"))
tdf["female"] = tdf.sex_groups.apply(
    lambda x: count_individual_sex(x.split(","), "female")
)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf.sex_groups.fillna("", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["unclear_count"] = tdf.sex_groups.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["unidentifiable_count"] = tdf.sex_groups.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

S

In [9]:
def group_age_sex(age, sex):
    assert len(age.split(",")), len(sex.split(","))
    pairings = []
    for age, sex in zip(age.split(","), sex.split(",")):
        pair = f"{age} {sex}"
        pairings.append(pair)
    return ",".join(pairings)


tdf["age_sex_group"] = tdf.apply(
    lambda x: group_age_sex(x.age_groups, x.sex_groups), axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["age_sex_group"] = tdf.apply(


**Generate age sex pairings**

In [10]:
as_group = []
for group in tdf["age_sex_group"].unique():
    for g in group.split(","):
        as_group.append(g)
as_group = list(set(as_group))
as_group = [i for i in as_group if i != " "]


def count_age_sex_pairs(x, g):
    tmp = x.split(",")
    count = tmp.count(g)
    return count


for g in as_group:
    tdf[f"{g}"] = tdf.age_sex_group.apply(lambda x: count_age_sex_pairs(x, g))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf[f"{g}"] = tdf.age_sex_group.apply(lambda x: count_age_sex_pairs(x, g))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf[f"{g}"] = tdf.age_sex_group.apply(lambda x: count_age_sex_pairs(x, g))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf[f"{g}"] = tdf.age_sex_group.apply(lambda x: count_a

In [11]:
# Change unclear and unidentifiable to be age-only
tdf.head()

Unnamed: 0,video_id,age_groups,sex_groups,min,max,country,research_site,location_metadata,habitat,day,...,unidentifiable unidentifiable,juvenile female,adult unclear,adolescent unclear,unidentifiable unclear,adult unidentifiable,juvenile male,infant unidentifiable,adult male,adult female
0,baf_vid16_0340989_1432398_20151114_12010008,adult,male,1.0,1.0,mali,bafing,nesting site/water source,gallery forest,1.0,...,0,0,0,0,0,0,0,0,1,0
4,baf_vid16_0340989_1432398_20151114_12010009,"adult,adult","male,male",2.0,2.0,mali,bafing,nesting site/water source,gallery forest,1.0,...,0,0,0,0,0,0,0,0,2,0
8,baf_vid16_0340989_1432398_20151114_12010012,"adult,infant,adult,infant","female,unclear,female,unclear",4.0,4.0,mali,bafing,nesting site/water source,gallery forest,1.0,...,0,0,0,0,0,0,0,0,0,2
12,baf_vid19_0340963_1432403_20151114_11220023,adolescent,male,1.0,1.0,mali,bafing,nesting site,gallery forest,22.0,...,0,0,0,0,0,0,0,0,0,0
16,baf_vid19_0340963_1432403_20151114_11220024,"adult,adolescent,adult","male,female,male",3.0,3.0,mali,bafing,nesting site,gallery forest,22.0,...,0,0,0,0,0,0,0,0,2,0


In [12]:
tdf["infant"] = tdf["infant unclear"] + tdf["infant unidentifiable"]
tdf["juvenile"] = tdf["juvenile unclear"] + tdf["juvenile unidentifiable"]
tdf["adolescent"] = tdf["adolescent unclear"] + tdf["adolescent unidentifiable"]
tdf["adult"] = tdf["adult unclear"] + tdf["adult unidentifiable"]

tdf["unclear chimpanzee"] = (
    tdf["unidentifiable unidentifiable"] + tdf["unidentifiable unclear"]
)

tdf.drop(
    columns=[
        "infant unclear",
        "juvenile unclear",
        "adolescent unclear",
        "adult unclear",
        "infant unidentifiable",
        "juvenile unidentifiable",
        "adolescent unidentifiable",
        "adult unidentifiable",
        "unidentifiable unidentifiable",
        "unidentifiable unclear",
    ],
    inplace=True,
)

pairings = [
    "juvenile female",
    "unidentifiable male",
    "infant female",
    "adolescent male",
    "adolescent female",
    "juvenile male",
    "infant male",
    "adult male",
    "adult female",
    "infant",
    "juvenile",
    "adolescent",
    "adult",
    "unclear chimpanzee",
]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["infant"] = tdf["infant unclear"] + tdf["infant unidentifiable"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["juvenile"] = tdf["juvenile unclear"] + tdf["juvenile unidentifiable"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tdf["adolescent"] = tdf["adolescent unclear"] + tdf["adolesce

In [16]:
# Drop videos without hour:minute
tdf = tdf[~tdf["time_hr"].isna()]

tdf["month"] = tdf.month.astype(int)
tdf["year"] = tdf.year.astype(int)
tdf["day"] = tdf.day.astype(int)
tdf["time_hr"] = tdf.time_hr.astype(int)
tdf["time_min"] = tdf.time_min.astype(int)

tdf.month = tdf.month.apply(lambda x: calendar.month_name[x])

In [33]:
def desc_composition(x, pairings):
    if x["max"] == 1:
        start = "A video of"
        for pair in pairings:
            if x[pair] == 1:
                start += f" {p.number_to_words(x[pair])} {pair} "
        start = start + "chimpanzee"
    elif x["max"] > 1:
        start = (
            f"A video of {p.number_to_words(int(x['max']))} chimpanzees, composed of"
        )
        tmp = []
        for pair in pairings:
            if x[pair] >= 1:
                tmp.append(pair)

        number_of_pairs = len(tmp)
        only_one_pair = True if len(set(tmp)) == 1 else False

        for i, pair in enumerate(tmp):
            sp = f"{pair}" if x[pair] == 1 else f"{pair}s"
            if only_one_pair:
                start += f" {p.number_to_words(x[pair])} {sp}"
            elif i == number_of_pairs - 1:
                start += f" and {p.number_to_words(x[pair])} {sp}"
            else:
                if i == number_of_pairs - 2:
                    start += f" {p.number_to_words(x[pair])} {sp}"
                else:
                    start += f" {p.number_to_words(x[pair])} {sp},"
        # start += f" {p.number_to_words(x[pair])} {pair}s,"
    return start


def desc_location(x):
    return f"It was filmed in {x['country']} at the {x['research_site']} research site"


def desc_habitat(x):
    desc = f"The footage is from a {x['location_metadata']} in a {x['habitat']} habitat"
    return desc


def desc_time(x):
    desc = f"Filming took place on {x['day']} {x['month']} {x['year']} at {x['time_hr']}:{x['time_min']}."
    return desc


def preprocess_behaviours(x, pd):
    if pd:
        tmp = x.split(",")
        tmp = [i.replace("_", " ") for i in tmp]
        if tmp == [""]:
            d = "nothing"
        else:
            # Add an 'and' before the last item
            start = "the chimpanzees exhibiting"
            if len(tmp) > 1:
                # ['climbing', 'travel']
                end = "and " + tmp[-1]
                d = ", ".join(tmp[:-1])
                d = start + " " + d + " " + end
            else:
                d = start + " " + tmp[0]
    else:
        tmp = x.split(",")
        tmp = [i.replace("_", " ") for i in tmp]
        if tmp == [""]:
            d = "nothing"
        else:
            # Add an 'and' before the last item
            start = "the chimpanzee exhibiting"
            if len(tmp) > 1:
                # ['climbing', 'travel']
                end = "and " + tmp[-1]
                d = ", ".join(tmp[:-1])
                d = start + " " + d + " " + end
            else:
                d = start + " " + tmp[0]
    return d


def desc_behaviour(x):
    ps = True if x["max"] > 1 else False
    desc = f"The first 15 seconds shows {preprocess_behaviours(x['clip_0'], ps)}. The next 15 seconds shows {preprocess_behaviours(x['clip_1'], ps)}. The next 15 seconds shows {preprocess_behaviours(x['clip_2'], ps)}. The last 15 seconds shows {preprocess_behaviours(x['clip_3'], ps)}."
    return desc

In [14]:
tdf = vldf.merge(tdf, on="video_id", how="left")

In [34]:
tdf["desc"] = tdf.apply(
    lambda x: f"{desc_composition(x, pairings)}. {desc_location(x)}. {desc_habitat(x)}. {desc_time(x)}. {desc_behaviour(x)}",
    axis=1,
)

In [35]:
tdf.merge(df, on="video_id", how="left").columns

Index(['video_id', 'clip_0', 'clip_1', 'clip_2', 'clip_3', 'age_groups_x',
       'sex_groups_x', 'min_x', 'max_x', 'country_x', 'research_site_x',
       'location_metadata_x', 'habitat_x', 'day_x', 'month_x', 'year_x',
       'time_hr_x', 'time_min_x', 'label_x', 'unidentifiable_count',
       'infant_count', 'juvenile_count', 'adolescent_count', 'adult_count',
       'unclear_count', 'male', 'female', 'age_sex_group', 'infant female',
       'unidentifiable male', 'adolescent male', 'adolescent female',
       'infant male', 'juvenile female', 'juvenile male', 'adult male',
       'adult female', 'infant', 'juvenile', 'adolescent', 'adult',
       'unclear chimpanzee', 'desc', 'subject_id', 'start.time',
       'age_groups_y', 'sex_groups_y', 'country_y', 'research_site_y', 'genus',
       'species', 'location_metadata_y', 'habitat_y', 'min_y', 'max_y',
       'day_y', 'month_y', 'year_y', 'time_hr_y', 'time_min_y',
       'behavioral_context', 'p_camera_reaction', 'p_tool_use',
   

In [None]:
tdf.merge(df, on="video_id", how="left")[
    [
        "video_id",
        "desc",
        "label",
        "p_camera_reaction",
        "p_tool_use",
        "p_object_carrying",
        "p_bipedal",
        "p_feeding",
        "p_chimp_carrying",
        "p_vocalisation",
        "p_climbing",
        "p_aggression",
        "p_travel",
        "p_sex",
        "p_piloerection",
        "p_social_interaction",
        "p_grooming",
        "p_display",
        "p_cross_species_interaction",
        "p_resting",
        "p_playing",
    ]
]

In [None]:
train_tdf = train_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
train_tdf.rename(columns={"label_y": "label"}, inplace=True)
train_tdf.drop(columns=["label_x"], inplace=True)

val_tdf = val_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
val_tdf.rename(columns={"label_y": "label"}, inplace=True)
val_tdf.drop(columns=["label_x"], inplace=True)

test_tdf = test_vdf.merge(tdf, left_on="video", right_on="video_id", how="left")
test_tdf.rename(columns={"label_y": "label"}, inplace=True)
test_tdf.drop(columns=["label_x"], inplace=True)

In [None]:
train_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/train_text_only.csv", index=False
)

val_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/val_text_only.csv", index=False
)

test_tdf[["video_id", "descriptor", "label"]].to_csv(
    "data/annotations/text_only/test_text_only.csv", index=False
)

In [None]:
df[["video_id", "split_tags"]][
    df["video_id"] == "tair_cam22_688836_647457_20131025_ek000246"
].groupby("video_id").apply(lambda x: x.split_tags.values).iloc[0]

**Create video dataset**

In [None]:
import ast
import numpy as np

vdf = df[["video_id", "label"]]
vdf.label = vdf.label.apply(lambda x: x[:-1])  # let 0 vector represent no behaviour
vdf.label = vdf.label.apply(lambda x: str(list(x)))
vdf = vdf.drop_duplicates()


vdf.label = vdf.label.apply(lambda x: ast.literal_eval(x))
labels = np.array(list(vdf.label.values))
X = vdf.video_id.to_numpy().reshape((6675, 1))

assert len(X) == len(labels)

X_train, y_train, X_test, y_test = iterative_train_test_split(X, labels, test_size=0.30)
X_test, y_test, X_val, y_val = iterative_train_test_split(
    X_test, y_test, test_size=0.33
)
y_train, y_test, y_val = (
    [str(list(x)) for x in y_train],
    [str(list(x)) for x in y_test],
    [str(list(x)) for x in y_val],
)

train_vdf = pd.DataFrame({"video": X_train[:, 0], "label": y_train})
val_vdf = pd.DataFrame({"video": X_val[:, 0], "label": y_val})
test_vdf = pd.DataFrame({"video": X_test[:, 0], "label": y_test})

train_vdf.to_csv("data/annotations/video_only/train.csv", index=False)
val_vdf.to_csv("data/annotations/video_only/val.csv", index=False)
test_vdf.to_csv("data/annotations/video_only/test.csv", index=False)

**Test embedding metadata**

In [None]:
from transformers import CLIPTokenizer, CLIPTextModel

In [None]:
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")

for module in text_model.text_model.encoder.layers[:-1].modules():
    for param in module.parameters():
        param.requires_grad = False

tokenized_text = tdf.descriptor.apply(
    lambda x: tokenizer(
        x, padding="max_length", max_length=77, truncation=True, return_tensors="pt"
    )
)

In [None]:
text_features = text_model(**tokenized_text[0])

In [None]:
#### This is for temporal processing of the dataset #####

In [None]:
test_df[
    ["video_id", "behavioral_context", "camera_" "start.time"]
    + list(test_df.columns[-19:])
]

In [None]:
collection = []
for video_name in test_df.video_id.unique():
    item = {}
    tmp = test_df[test_df.video_id == video_name]
    item["video_name"] = video_name
    item["metadata"] = dict(
        age_groups=tmp.age_groups, sex_groups=tmp.sex_groups, max=tmp.max, min=tmp.min
    )
    item["behaviour"] = tmp["start.time"].to_dict()
    collection.append(item)

In [None]:
collection[0]["metadata"]["age_groups"]