In [None]:
import os
import json
import shutil
import pandas as pd
import numpy as np
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import random, math

In [2]:
# Load Maureens annotations
csv_files = glob('data/sites/csv/**/*.csv', recursive=True)
sorted_csv_files = sorted(csv_files, key=lambda x: x.split('/')[-1])
initialiser, remainder = sorted_csv_files[0], sorted_csv_files[1:]
df = pd.read_csv(initialiser, encoding="ISO-8859-1")
for file in remainder:
    site = pd.read_csv(file, encoding="ISO-8859-1")
    df = pd.concat([df, site])
    
df['subdir_video'] = df.subfolder.astype(str) + '_' + df.video_file_name.astype(str)
df.subdir_video = df.subdir_video.apply(lambda x: x.lower())
df.subdir_video = df.subdir_video.str.split('.').str[0]
df['prepend_zero'] = df.subdir_video.apply(lambda x: f'{"_".join(x.split("_")[:-1])}_{"0" + x.split("_")[-1]}')

In [3]:
# Load filepaths from Jade2
jf = pd.read_csv('data/jade2/chimp_videos.csv', index_col=False)
jf.drop(columns=['Unnamed: 0'], inplace=True)
jf.files = jf.files.apply(lambda x: x.split('.')[0].lower())

**Matching video-annotations**

In [4]:
matching1 = df[df.subdir_video.isin(jf.files.values)]
matching1.subdir_video.unique().__len__()

13460

In [5]:
# Handling missing zeros
matching2 = df[df.prepend_zero.isin(jf.files.values)]
matching2.drop(columns=['subdir_video'], inplace=True)
matching2.rename(columns={"prepend_zero": "subdir_video"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching2.drop(columns=['subdir_video'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching2.rename(columns={"prepend_zero": "subdir_video"}, inplace=True)


In [20]:
# Concatenate matching dataframes
multi_label_df = pd.concat([matching1, matching2])
multi_label_df.drop(columns=['prepend_zero'], inplace=True)
multi_label_df[multi_label_df.subdir_video.isin(jf.files)]

Unnamed: 0,new_row_id,country,research_site,genus,species,cam_coverage_area,location_metadata,habitat,utm_zone,utm_long,...,sex,tool_use,vocalization,bipedal,camera_reaction,behavioral_context,other_species,additional_comments,record_type,subdir_video
0,1,mali,bafing,Pan,troglodytes verus,9.87,trail,forest on rock,29n,342661.0,...,male,no,no,no,no,travel,,Not very clear screen,60s_video,baf_vid1_0342661_1432807_20141103_11160014
5,6,mali,bafing,Pan,troglodytes verus,14.00,feeding site/fruit tree,gallery forest,29n,346467.0,...,male,no,no,no,yes,feeding,,"Near the camera, with a fruit in his mouth",60s_video,baf_vid10_0346467_1436892_20151112_11280025
10,11,mali,bafing,Pan,troglodytes verus,7.00,water source,forest on rock,29n,343264.0,...,female,no,no,no,no,resting,,Carries an infant ventrally,60s_video,baf_vid11_0343264_1434832_20151019_12100066
11,12,mali,bafing,Pan,troglodytes verus,7.00,water source,forest on rock,29n,343264.0,...,unclear,no,no,no,no,resting,,Rides ventrally on the above chimp,60s_video,baf_vid11_0343264_1434832_20151019_12100066
12,13,mali,bafing,Pan,troglodytes verus,7.00,water source,forest on rock,29n,343264.0,...,female,no,yes,no,yes,resting,,At the top right side of the screen. Seems to ...,60s_video,baf_vid11_0343264_1434832_20151019_12100066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
963,964,guinea,sobory,Pan,troglodytes verus,80.00,trail,gallery forest,29n,255619.0,...,male,no,no,no,no,travel,,,60s_video,sob_vid05_0255619_1305651_20160319_03220020
964,965,guinea,sobory,Pan,troglodytes verus,80.00,trail,gallery forest,29n,255619.0,...,unclear,no,no,no,yes,travel,,,60s_video,sob_vid05_0255619_1305651_20160319_03220020
965,966,guinea,sobory,Pan,troglodytes verus,80.00,trail,gallery forest,29n,255619.0,...,male,no,no,no,yes,travel,,,60s_video,sob_vid05_0255619_1305651_20160319_03220020
966,967,guinea,sobory,Pan,troglodytes verus,80.00,trail,gallery forest,29n,255619.0,...,male,no,no,no,yes,travel,,,60s_video,sob_vid05_0255619_1305651_20160319_03220020


In [22]:
# Ventral and dorsal carrying
multi_label_df['ventral'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'ventral' in str(x) else 0)
multi_label_df['dorsal'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'dorsal' in str(x) else 0)
multi_label_df['carry'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'carry' in str(x) else 0)
multi_label_df['carries'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'carries' in str(x) else 0)

In [23]:
multi_label_df['walking'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'walk' in str(x) else 0)
multi_label_df['run'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'run' in str(x) else 0)
multi_label_df['jump'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'jump' in str(x) else 0)
multi_label_df['sit'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'sit' in str(x) else 0)
multi_label_df['hang'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'hang' in str(x) else 0)

In [24]:
multi_label_df['camera'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'camera' in str(x) else 0)
multi_label_df['look'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'look' in str(x) else 0)
multi_label_df['camera_touch'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'camtouch' in str(x) else 0)
multi_label_df['touch'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'touch' in str(x) else 0)

In [25]:
multi_label_df['social'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'social' in str(x) else 0)
multi_label_df['play'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'play' in str(x) else 0)
multi_label_df['interaction'] = multi_label_df.additional_comments.apply(lambda x: 1 if 'interaction' in str(x) else 0)

In [31]:
multi_label_df[(multi_label_df.behavioral_context=='camera reaction')]

Unnamed: 0,new_row_id,country,research_site,genus,species,cam_coverage_area,location_metadata,habitat,utm_zone,utm_long,...,jump,sit,hang,camera,look,camera_touch,touch,social,play,interaction
2171,2172,guineabissau,boe,Pan,troglodytes verus,26.25,buttress tree,gallery forest,28n,618587.0,...,0,0,0,0,0,1,1,0,0,0
2188,2189,guineabissau,boe,Pan,troglodytes verus,26.25,buttress tree,gallery forest,28n,618587.0,...,0,0,0,0,0,1,1,0,0,0
2189,2190,guineabissau,boe,Pan,troglodytes verus,26.25,buttress tree,gallery forest,28n,618587.0,...,0,0,0,0,0,1,1,0,0,0
2190,2191,guineabissau,boe,Pan,troglodytes verus,26.25,buttress tree,gallery forest,28n,618587.0,...,0,0,0,0,0,1,1,0,0,0


In [28]:
multi_label_df.drop_duplicates(subset=['additional_comments']).to_csv('processing_additional_comments.csv')

In [18]:
# Processing comments
multi_label_df.drop(columns=['utm_zone', 'utm_long', 'utm_lat'], inplace=True)
multi_label_df.to_csv('processing_additional_comments.csv')

In [39]:
multi_label_df[multi_label_df.subdir_video=='fouta_vid18b_260760_1292882_20140524_pict0122'][['behavioral_context','additional_comments']]

Unnamed: 0,behavioral_context,additional_comments
2739,displaying,**This video has a lot going on so I attempted...
2740,displaying,ventral infant
2741,displaying,2nd to far right at beginning; has a dorsal in...
2742,displaying,dorsal infant with female above
2743,displaying,following not far behind female
2744,displaying,following behind juvenile above; goes downhill...
2745,displaying,following behind other 2 juveniles; crutch wal...
2746,displaying,following behind juvs; has a ventral infant
2747,displaying,ventral infant
2748,displaying,in background behind the adult female


In [None]:
# Trial 
test = multi_label_df
test = test.loc[:, ['subdir_video','behavioral_context']]

In [None]:
test

In [None]:
test = test.set_index('subdir_video').dropna(axis=1, how='all')
test = pd.get_dummies(test, prefix='', prefix_sep='').groupby(level=0, axis=1).max()
test = test.reset_index('subdir_video').dropna(axis=1, how='all')

In [None]:
test

In [None]:
test['count'] = test.loc[:, test.columns != 'subdir_video'].sum(axis=1)

In [None]:
test.subdir_video.unique().__len__()