**Create labels from Chimp&See data**

In [1]:
import re
import json
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
chimp = pd.read_excel(open('/home/dl18206/Downloads/Chimp & See master chimpanzee video list 2015-2018_OB_analysis.xlsx', 'rb'),
              sheet_name="Otto's Analysis (Plain Text)")

In [None]:
core_videos = glob('../acp/videos/core/**/*.mp4', recursive=True)
core_videos = [x.split('/')[-1].split('.')[0] for x in core_videos]

df = chimp[chimp['ID'].isin(core_videos)]
df['split_tags'] = df.Tags.str.split(' ')

In [None]:
# ID's for videos with single ape and tracklet info
# subset_ids = open('single_ape_subset.txt', 'r').read().strip().split()
# subset_df = pd.DataFrame(subset_ids, columns=['id'])

In [None]:
# df = df.join(subset_df.set_index('id'), on='ID', how='inner')

In [None]:
single_chimp_mask = df.Tags.apply(lambda x:
                                  ('1_chimp' in x) 
                                  & ('2_chimp' not in x)
                                  & ('3_chimp' not in x)
                                  & ('4_chimp' not in x)
                                  & ('5_chimp' not in x)
                                  & ('6_chimp' not in x)
                                  & ('7_chimp' not in x)
                                  & ('8_chimp' not in x)
                                  & ('9_chimp' not in x)
                                  & ('10_chimp' not in x)
                                  & ('11_chimp' not in x)
                                  & ('12_chimp' not in x)
                                  & ('13_chimp' not in x)
                                  & ('14_chimp' not in x)
                                  & ('15_chimp' not in x)
                                  & ('16_chimp' not in x)
                                  & ('17_chimp' not in x)
                                  & ('18_chimp' not in x)
                                  & ('19_chimp' not in x)
                                  & ('20_chimp' not in x))

tool_use_mask = df.Tags.apply(lambda x: 
                 ('carrying_tool' in x) or 
                 ('using_tool' in x) or 
                 ('rock_tool_use' in x) or 
                 ('holding_tool' in x) or
                 ('tool_usage' in x) or 
                 ('tool_modification' in x) or
                 ('tool_user' in x) or 
                 ('tool_use_learning' in x) or
                 ('tree_limb_tool' in x) or 
                 ('tool-usage' in x) or
                 ('chimps_using_tool' in x) or
                 ('tool_useage' in x) or 
                 ('chimp_with_tool' in x) or 
                 ('tool-using' in x) or
                 ('toolusage' in x) or 
                 ('tool_working' in x) or
                 ('log_tool' in x) or 
                 ('tree_limb_tool_use' in x) or
                 ('tools_use' in x) or
                 ('chimp_tool_use' in x) or 
                 ('tool_work' in x) or 
                 ('tool_use' in x) or
                 ('nicetooluse' in x) or 
                 ('tool_using' in x) or
                 ('tools' in x) or 
                 ('tool-use' in x) or
                 ('termite-tool' in x) or                              
                 ('tool' in x) or 
                 ('tooluse' in x) or 
                 ('chimptoolusage' in x) or
                 ('usingtool' in x) or 
                 ('chimptooluse' in x) or
                 ('ool_use' in x) or 
                 ('toool_usage' in x) or
                 ('carrying_objects' in x) or
                 ('rocksmash' in x) or
                 ('cracking' in x) or
                 ('stone_tool_use' in x) or
                 ('hammerstone' in x) or
                 ('stone' in x) or
                 ('rock' in x) or
                 ('hammer' in x) or
                 ('cracking_nuts' in x) or
                 ('breakingnut' in x) or
                 ('nut_cracking' in x) or
                 ('nutcracking' in x) or
                 ('crackingnuts' in x) or
                 ('woodhammer' in x))

feeding_mask = df.Tags.apply(lambda x: 
                 ('eatingintree' in x) or 
                 ('eating' in x) or
                 ('clay_eating' in x) or
                 ('chimp_with_young_feeding_on_ground' in x) or
                 ('feeding' in x) or 
                 ('insects' in x) or
                 ('chimpcollectingfood' in x) or
                 ('fruit' in x) or 
                 ('applejack' in x) or
                 ('wood_' in x) or 
                 ('wood_eating' in x) or 
                 ('eating_wood' in x) or 
                 ('wood_chewing' in x) or
                 ('wood_feeding' in x) or 
                 ('wood-eating' in x) or
                 ('woodeating' in x) or 
                 ('woord_eating' in x) or
                 ('wwod-eating' in x) or 
                 ('eatingbark' in x) or
                 ('barking' in x) or 
                 ('bark-eating' in x) or
                 ('bark_eating' in x) or 
                 ('tree_gnawing' in x) or                              
                 ('stripping_bark' in x) or
                 ('nut_pick' in x) or 
                 ('nutpick' in x) or
                 ('nut' in x) or 
                 ('nuts' in x) or
                 ('termites' in x) or 
                 ('termite_mound' in x) or 
                 ('termitefishing' in x) or 
                 ('female_on_termite_mound' in x))
                             
swelling_mask = df.Tags.apply(lambda x: 
                           ('femaleswelling' in x) or 
                           ('max_swelling' in x) or 
                           ('sexual_swelling' in x) or 
                           ('max-swelling' in x) or 
                           ('big_swelling' in x) or 
                           ('swelling' in x) or 
                           ('extreme_swelling' in x) or                          
                           ('same_swelling' in x))

camera_mask = df.Tags.apply(lambda x: 
                 ('camera_interaction' in x) or 
                 ('camera_' in x) or 
                 ('camera_aware' in x) or 
                 ('camera_touch' in x) or
                 ('camera-reaction' in x) or 
                 ('camerainteraction' in x) or
                 ('camera_reactrion' in x) or 
                 ('camera_stare' in x) or
                 ('camera_reaction' in x) or 
                 ('camerareaction' in x) or
                 ('cam_reaction' in x) or 
                 ('camera_reaction_infants' in x) or
                 ('camera-aware' in x) or 
                 ('touchcam' in x) or   
                 ('camea_reaction' in x) or    
                 ('camara_reaction' in x) or    
                 ('cam_touch' in x) or                                
                 ('caerma_reaction' in x) or
                 ('selfie' in x) or                                
                 ('chimp_selfie' in x))

climbing_mask = df.Tags.apply(lambda x: 
                           ('climbing_in_tree' in x) or 
                           ('treeclimbing' in x) or 
                           ('climbing' in x) or 
                           ('climbing_tree' in x) or 
                           ('tree' in x) or
                           ('climping' in x) or
                           ('inatree' in x))


bipedal_mask = df.Tags.apply(lambda x: 
                           ('pibedal' in x) or 
                           ('upright_walking' in x) or 
                           ('chimp_just_standing' in x) or 
                           ('chimp_standing' in x) or 
                           ('standing' in x) or 
                           ('bipedal' in x))

night_mask = df.Tags.apply(lambda x: 
                           ('chimatnight' in x) or 
                           ('nightchimps' in x) or 
                           ('nightmeal' in x) or 
                           ('nightfeeding' in x) or 
                           ('nightchimp' in x) or 
                           ('nightfeeding' in x) or 
                           ('night_chimp' in x))

In [None]:
df['single_chimp'] = single_chimp_mask
df['tool_use'] = tool_use_mask
df['feeding'] = feeding_mask
df['climbing'] = climbing_mask
df['swelling'] = swelling_mask
df['bipedal'] = bipedal_mask
df['camera'] = camera_mask
df['night'] = night_mask

**Single ape subset - label processing**

In [None]:
single_df = df[df.single_chimp].rename(columns={'Unnamed: 2': 'site'})
single_df = single_df.reindex(sorted(single_df.columns), axis=1).drop(columns=['split_tags', 'single_chimp'])
first_column = single_df.pop('site')
single_df.insert(1, 'Name', first_column)
single_df = single_df.rename(columns={'ID': 'id', 'Name': 'name', 'Tags': 'tags'})
single_df['label'] = single_df.apply(lambda x: list(map(int, [x.bipedal, x.camera, x.climbing, x.feeding, x.night, x.swelling, x.tool_use])), axis=1)
single_df

In [None]:
single_df[single_df.id=='ACP000002l'].apply(lambda x: list(map(int, [x.bipedal, x.camera, x.climbing, x.feeding, x.night, x.swelling, x.tool_use])), axis=1).iloc[0]

In [None]:
# sklearn.model_selection.train_test_split(*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None)
videos = single_df.id.values
sites = single_df.name.values
train_x, test_x, train_y, test_y = train_test_split(videos, sites, train_size=0.7, test_size=0.3, random_state=0)

In [None]:
single_df['train'] = single_df.id.apply(lambda x: True if x in train_x else False)
single_df['test'] = single_df.train.apply(lambda x: True if not x else False)

In [None]:
single_df.to_excel('panaf_5k_labels.xlsx')

In [None]:
single_df.head()

**Behaviour occurences**

In [None]:
attr = single_df.columns[3:10]
counts=single_df[attr].sum(axis=0) # occurence counts
ax = counts.plot.bar(figsize=(15,5));
ax.set_xlabel("Attributes");
ax.set_ylabel("Occurence");

**Behaviour co-occurences**

In [None]:
matrix = single_df[attr].astype(int)
co_matrix = matrix.T.dot(matrix)
diag = np.diag(co_matrix)
rel_co_matrix = np.array([1 / x for x in diag]) * co_matrix
plt.figure(figsize=(15,10))
sns.heatmap(rel_co_matrix);

**Generate multi-label groundtruth**

In [None]:
df.apply(lambda x: list(map(int, [x.tool_use, x.swelling, x.bipedal, x.camera, x.night])), axis=1)

In [None]:
data= {}
for column in df:
    if column in ['tool_use', 'swelling', 'bipedal', 'camera', 'night']:
        data[column] = df[column].value_counts().loc[True]

In [None]:
data