In [1]:
import os
import json
import shutil
import pandas as pd
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import random, math

In [2]:
csv_files = glob("data/sites/csv/**/*.csv", recursive=True)
sorted_csv_files = sorted(csv_files, key=lambda x: x.split("/")[-1])
initialiser, remainder = sorted_csv_files[0], sorted_csv_files[1:]
df = pd.read_csv(initialiser, encoding="ISO-8859-1")
for file in remainder:
    site = pd.read_csv(file, encoding="ISO-8859-1")
    df = pd.concat([df, site])
df["subdir_video"] = df.subfolder.astype(str) + "_" + df.video_file_name.astype(str)
df["subdir_video"] = df["subdir_video"].str.lower()

In [3]:
videos = (
    df.groupby("subdir_video")["behavioral_context"]
    .apply(
        lambda x: ",".join(list(set([str(i) for i in x])))
        if len(list(x)) > 1
        else list(x)[0]
    )
    .index.values
)
behavioural_contexts = (
    df.groupby("subdir_video")["behavioral_context"]
    .apply(
        lambda x: ",".join(list(set([str(i) for i in x])))
        if len(list(x)) > 1
        else list(x)[0]
    )
    .values
)
behaviour_df = pd.DataFrame(
    {"subdir_video": videos, "behavioral_context": behavioural_contexts}
)

In [4]:
behaviour_df = pd.concat(
    [behaviour_df, behaviour_df["behavioral_context"].str.get_dummies()], axis=1
)

In [5]:
df.tool_use.fillna("unknown", inplace=True)
df.camera_reaction.fillna("unknown", inplace=True)
df.vocalization.fillna("unknown", inplace=True)
df.bipedal.fillna("unknown", inplace=True)

In [6]:
# TODO: group behaviours according to Hjalmar classifications
# TODO: explore how to augment MM annotations with CNS annotations WITH quality assurance
# TODO: cross reference CNS and MM annotations
# TODO: consider that NaN values in MM annotations do not mean neccessarily mean NO!
# TODO: linked to above point - timestamp actions in 15 second increments as per C&S annotations

In [7]:
# New class for offscreen vocalizations + cleaning bipedal column
df.rename(
    columns={
        "tool_use": "tool_use_col",
        "vocalization": "vocalization_col",
        "bipedal": "bipedal_col",
        "camera_reaction": "camera_reaction_col",
    },
    inplace=True,
)
df["vocalisation_offscreen"] = df.vocalization_col == "offscreen"
df.bipedal_col.replace({"ues": "yes"}, inplace=True)

In [9]:
# df.tool_use_col.value_counts(), df.vocalization_col.value_counts(), df.bipedal_col.value_counts(), df.camera_reaction_col.value_counts()

In [10]:
# Merge col behaviour into behaviour_df
attr = ["tool_use_col", "vocalization_col", "bipedal_col", "camera_reaction_col"]
behaviour_df = behaviour_df.merge(
    df.groupby("subdir_video")[attr].agg(
        lambda x: True
        if "yes" in list(x)
        else ("unknown" if "unknown" in list(x) else False)
    ),
    on="subdir_video",
)

In [11]:
behaviour_df.columns

Index(['subdir_video', 'behavioral_context', 'aggression', 'camera reaction',
       'climbing', 'displaying', 'displaying ', 'feeding', 'greeting',
       'grooming', 'nan', 'no', 'playing', 'reassurance', 'resting', 'sexual',
       'tool use', 'tool use algae', 'tool use ants', 'tool use honey',
       'tool use nuts', 'tool use stone throwing', 'tool use termites',
       'tool use unknown', 'travel', 'unclear', 'tool_use_col',
       'vocalization_col', 'bipedal_col', 'camera_reaction_col'],
      dtype='object')

In [12]:
cns = pd.read_csv("data/translations/all_cs_clip_information.txt", sep="\t")
cns["video"] = cns["video.id"].str.split("/").str[-1].str.split(".").str[0]
cns["dir"] = cns["video.id"].str.split("/").str[-2]
cns["subdir_video"] = cns["dir"] + "_" + cns["video"]
cns.subdir_video = cns.subdir_video.str.lower()
cns.subject_id = cns.subject_id.str.lower()

In [13]:
cns.drop(columns=["video.id", "video", "dir"], inplace=True)
behaviour_df = behaviour_df.merge(cns, on="subdir_video", how="left")

In [14]:
behaviour_df

Unnamed: 0,subdir_video,behavioral_context,aggression,camera reaction,climbing,displaying,displaying.1,feeding,greeting,grooming,...,tool_use_col,vocalization_col,bipedal_col,camera_reaction_col,subject_id,start.time,site,tags,classifications,behavior
0,baf_vid10_0346467_1436892_20151112_11280025,feeding,0,0,0,0,0,1,0,0,...,False,False,False,True,61054723,0.0,bafing,"floridpostern,chimp,1_chimp,adult,male,camera_...","CHIMPANZEE,CHIMPANZEE","PLAYING,TRAVELING"
1,baf_vid10_0346467_1436892_20151112_11280025,feeding,0,0,0,0,0,1,0,0,...,False,False,False,True,61054725,15.0,bafing,,"NOTHINGHERE,NOTHINGHERE,NOTHINGHERE",
2,baf_vid10_0346467_1436892_20151112_11280025,feeding,0,0,0,0,0,1,0,0,...,False,False,False,True,61054726,30.0,bafing,,"NOTHINGHERE,NOTHINGHERE,NOTHINGHERE",
3,baf_vid10_0346467_1436892_20151112_11280025,feeding,0,0,0,0,0,1,0,0,...,False,False,False,True,61054729,45.0,bafing,,"NOTHINGHERE,NOTHINGHERE",
4,baf_vid10_0346467_1436892_20151227_1030007,climbing,0,0,1,0,0,0,0,0,...,False,False,False,True,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43640,uga_viduppc_230029_9392405_20141028_pict0074,travel,0,0,0,0,0,0,0,0,...,False,False,False,False,59796071,45.0,ugalla,"issavalley,chimp,1_chimp,issa-zai,female","ANTELOPEDUIKER,MONKEYORPROSIMIAN,NOTHINGHERE,N...","RESTING,TRAVELING"
43641,uga_viduppi_231449_9394588_20140820_pict0003,unclear,0,0,0,0,0,0,0,0,...,False,False,False,False,59753775,0.0,ugalla,"issavalley,chimp,1_chimp,adult,needcid","CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,CH...","DRINKINGFORAGING,PLAYING,RESTING,TOOLUSE"
43642,uga_viduppi_231449_9394588_20140820_pict0003,unclear,0,0,0,0,0,0,0,0,...,False,False,False,False,59753776,15.0,ugalla,"issavalley,chimp,adult,male,1_chimp,2_chimp,ne...","CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,CHIMPANZEE",TRAVELING
43643,uga_viduppi_231449_9394588_20140820_pict0003,unclear,0,0,0,0,0,0,0,0,...,False,False,False,False,59753777,30.0,ugalla,"issavalley,chimp,2_chimp,needcid,male","CHIMPANZEE,CHIMPANZEE,CHIMPANZEE,CHIMPANZEE",TRAVELING


In [15]:
attr = [
    "subject_id",
    "start.time",
    "tool_use_col",
    "vocalization_col",
    "bipedal_col",
    "camera_reaction_col",
    "behavioral_context",
    "tags",
    "behavior",
]

behavioural_context_attr = [
    "aggression",
    "camera reaction",
    "climbing",
    "displaying",
    "displaying ",
    "feeding",
    "greeting",
    "grooming",
    "nan",
    "no",
    "playing",
    "reassurance",
    "resting",
    "sexual",
    "tool use",
    "tool use algae",
    "tool use ants",
    "tool use honey",
    "tool use nuts",
    "tool use stone throwing",
    "tool use termites",
    "tool use unknown",
    "travel",
    "unclear",
]

behavioural_cols_attr = [
    "tool_use_col",
    "vocalization_col",
    "bipedal_col",
    "camera_reaction_col",
]

In [25]:
tag_df = pd.read_csv(
    "/home/dl18206/Downloads/Re_ getting back on track _)/all_tags_behaviours.csv"
)

In [30]:
tag_df.columns

Index(['Tags', 'camera_reaction', 'tool_use', 'object_carry',
       'termite_fishing', 'nut_cracking', 'stone_throw', 'bipedal', 'feeding',
       'wood_eating', 'chimp_carrying', 'vocalisation', 'travel', 'running',
       'walking', 'climbing', 'standing', 'aggression', 'charge', 'fight',
       'branch_shaking', 'hoot', 'grunt', 'mounting', 'sex', 'drinking',
       'piloerection', 'playing', 'social_interaction', 'defecating',
       'drumming', 'display', 'nursing', 'grooming', 'smelling',
       'cross_species_interaction', 'resting', 'sitting', 'in_a_tree',
       'tool_use_surface', 'tool_use_mound', 'night_chimps', 'off_camera',
       'no_behaviour', 'swelling', 'water', 'on_the_ground'],
      dtype='object')

In [63]:
tag_df[~tag_df.camera_reaction.isna()]

Unnamed: 0,Tags,camera_reaction,tool_use,object_carry,termite_fishing,nut_cracking,stone_throw,bipedal,feeding,wood_eating,...,sitting,in_a_tree,tool_use_surface,tool_use_mound,night_chimps,off_camera,no_behaviour,swelling,water,on_the_ground
12,cam_reaction,camera_reaction,,,,,,,,,...,,,,,,,,,,
25,camera_aware,camera_reaction,,,,,,,,,...,,,,,,,,,,
39,camera_reaction,camera_reaction,,,,,,,,,...,,,,,,,,,,
47,camera-aware,camera_reaction,,,,,,,,,...,,,,,,,,,,
118,camerareaction,camera_reaction,,,,,,,,,...,,,,,,,,,,
133,camerainteraction,camera_reaction,,,,,,,,,...,,,,,,,,,,
527,camera_stare,camera_reaction,,,,,,,,,...,,,,,,,,,,
788,selfie,camera_reaction,,,,,,,,,...,,,,,,,,,,
1095,camera_reactrion,camera_reaction,,,,,,,,,...,,,,,,,,,,
1123,camtouch,camera_reaction,,,,,,,,,...,,,,,,,,,,


In [43]:
tmp = behaviour_df[attr]
tmp.dropna(subset=["subject_id"], inplace=True)
tmp[tmp.subject_id.str.startswith("acp")]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp.dropna(subset=['subject_id'], inplace=True)


Unnamed: 0,subject_id,start.time,tool_use_col,vocalization_col,bipedal_col,camera_reaction_col,behavioral_context,tags,behavior
1156,acp0000ecu,0.0,unknown,unknown,unknown,unknown,camera reaction,"1_chimp,camera_reaction,chimp,dailyzoo,juvenil...","camera reaction,on the ground,traveling"
1157,acp0000ecx,15.0,unknown,unknown,unknown,unknown,camera reaction,,
1158,acp0000ecz,30.0,unknown,unknown,unknown,unknown,camera reaction,,
1159,acp0000ed2,45.0,unknown,unknown,unknown,unknown,camera reaction,,
1160,acp0000ebt,0.0,unknown,unknown,unknown,unknown,travel,"3_chimp,camera_reaction,chimp,early_morning,fe...","camera reaction,on the ground,traveling"
...,...,...,...,...,...,...,...,...,...
41834,acp0004g4w,45.0,unknown,unknown,unknown,unknown,travel,,
41835,acp0004g5v,0.0,unknown,unknown,unknown,unknown,travel,"1_chimp,chimp,hive,muddyfrost,needcid","on the ground,traveling"
41836,acp0004g5w,15.0,unknown,unknown,unknown,unknown,travel,,
41837,acp0004g5x,30.0,unknown,unknown,unknown,unknown,travel,,
