In [1]:
import mmcv
import torch
import torchvision
from tqdm import tqdm
from glob import glob
from einops import rearrange
import pandas as pd
from torch.utils.data import DataLoader
from torchvision.utils import make_grid
import torchvision.transforms as transforms
from panaf.datasets import SSLKineticsProcessing

  warn(f"Failed to load image Python extension: {e}")


In [2]:
def tensor2video(x, video_name, ape_id, frame_idx, outpath):
    
    outfile = f"{video_name}_f{frame_idx}_ape_{ape_id}.mp4"
    
    # Input tensor shape: BxTxCxWxH
    x = x.permute(0, 2, 1, 3, 4)[0].detach().cpu()
    min_val = x.min()
    max_val = x.max()
    x = ((x - min_val) * 255/(max_val-min_val)).to(torch.uint8)
    x = torch.permute(x, (1, 2, 3, 0))
    
    # Write tensor to video
    torchvision.io.write_video(filename=f"{outpath}/{outfile}", video_array=x, fps=24)

In [3]:
# Define basic transforms with short side scaling
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((256, 320)) # 256, 320
    ]
)

# Full paths to train and test data + annoations
data = "videos/original/drc/rt"
ann = "detections/scaled/convnext_cascade_rcnn/drc/rt"

train_dataset = SSLKineticsProcessing(
    data_dir=data,
    ann_dir=ann,
    sequence_len=96,
    sample_itvl=1,
    stride=96,
    spatial_transform=transform,
    temporal_transform=transform,
    behaviour_threshold=96,
    type='r'
)

0it [00:00, ?it/s]


In [46]:
loader = DataLoader(train_dataset, shuffle=False, batch_size=1)

In [47]:
x, name, ape_id, frame_idx = next(iter(loader))
grid = make_grid(rearrange(x, 'b t c h w -> (b t) c h w'), nrow=16)
img = torchvision.transforms.ToPILImage()(grid)
img.show()

In [48]:
# Generate cropped train videos
for x, video_name, ape_id, frame_idx in tqdm(loader):
    tensor2video(x, next(iter(video_name)), ape_id.item(), frame_idx.item(), outpath='videos/cropped/drc/rt')

100%|██████████| 797/797 [1:08:44<00:00,  5.17s/it]


**Generate Kinetics-like csv annotation file**

In [36]:
def generate_kinetics_annotation_file(path_to_videos, outfile):
    videos = glob(f'{path_to_videos}/**/*.mp4', recursive=True)
    df = pd.DataFrame(videos, columns=['videos'])
    df['label_index'] = 9 # Dummy label for SS training
    df.to_csv(outfile, sep=' ', header=None, index=False)

In [34]:
df = generate_kinetics_annotation_file(path_to_videos='videos/cropped/', outfile='test.csv')

In [38]:
videos = glob(f'videos/cropped/**/*.mp4', recursive=True)
df = pd.DataFrame(videos, columns=['videos'])
df['label_index'] = 9 # Dummy label for SS training

In [45]:
index = round(len(df) * 0.2)

In [46]:
df.sample(index)

Unnamed: 0,videos,label_index
8057,videos/cropped/guineabissau/boe/boe_vid23_0618...,9
2214,videos/cropped/sierraleone/oknp/OKNP_vid3_0811...,9
2604,videos/cropped/rwanda/nnp/NNP_Vid21_0745465_97...,9
6806,videos/cropped/guineabissau/boe/boe_vid26_0620...,9
8008,videos/cropped/guineabissau/boe/boe_vid22_0622...,9
...,...,...
470,videos/cropped/drc/bili/bili_vid04_242372_4794...,9
9202,videos/cropped/guineabissau/boe/boe_vid31_0617...,9
3411,videos/cropped/rwanda/nnp/NNP_Vid21_0745465_97...,9
2833,videos/cropped/rwanda/nnp/NNP_Vid23_0747217_97...,9


**Get complete video list**

In [17]:
df = pd.read_csv('chimp_videos.csv', index_col=False)
df.drop(columns=['Unnamed: 0'], inplace=True)

In [54]:
lower_case = [shutil.move(x, x.lower()) for x in glob("detections/scaled/**/*.json", recursive=True)]

**Dindefolo - categorise videos by behavioral context**

In [40]:
import os
import shutil

In [45]:
din = pd.read_csv('../maureen_annotations/dindefelo.csv')
videos = [x.split('/')[-1].split('.')[0] for x in glob("videos/original/din/**/*.mp4", recursive=True)]

In [51]:
for path, category in list(zip(din.subdir_video, din.behavioral_context))[:-5]:
    if not os.path.isdir(f"categorised/{category}"):
        os.mkdir(f"categorised/{category}")
        shutil.move(f"videos/original/din/{path}.mp4", f"categorised/{category}/{path}.mp4")
    else:
        shutil.move(f"videos/original/din/{path}.mp4", f"categorised/{category}/{path}.mp4")

In [52]:
moved_videos = [x.split('/')[-1].split('.')[0] for x in glob("videos/categorised/**/*.mp4", recursive=True)]

**Manual Kinetics-style video annotation**

In [148]:
import pandas as pd
from glob import glob

In [25]:
country_site = 'uganda/bwi'
videos = [x.split('/')[-1].lower() for x in glob(f"videos/cropped/{country_site}/**/*.mp4", recursive=True)]
df = pd.DataFrame(videos, columns=['videos']).sort_values('videos')
# Export 
# df.to_csv('uganda_bwi.csv', index=False)

**Convert .xlsx file to Kinetics-style annotation file**

In [7]:
# Load annotated cropped videos from xlsx file
prefix = '/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/chimp_cropped_videos'

df = pd.read_excel(
    'ssl_annotation.xlsx', 
    sheet_name=None,
)
df = pd.concat(df.values(), ignore_index=True)

# Need to check for whitespace etc.
df.unclear = df.unclear.apply(lambda x: True if x=='y' else False)
df.ape = df.ape.apply(lambda x: False if x=='n' else True)
df = df[(df.ape)&(df.unclear==False)]

df['path'] = prefix + '/' + df['country'].str.lower() + '/' + df['site'].str.lower() + '/' + df['videos'].str.lower()

In [8]:
# Preview dataframe
df

Unnamed: 0,videos,ape,unclear,start-end,country,site,path
0,bili_vid01_240967_477386_20121208_PICT0003_f19...,True,False,,drc,bili,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
1,bili_vid01_240967_477386_20121208_PICT0003_f1_...,True,False,,drc,bili,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
2,bili_vid01_240967_477386_20121208_PICT0003_f97...,True,False,,drc,bili,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
3,bili_vid01_240967_477386_20121208_PICT0009_f10...,True,False,,drc,bili,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
4,bili_vid01_240967_477386_20121208_PICT0009_f11...,True,False,,drc,bili,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
...,...,...,...,...,...,...,...
7747,bwi_vid8_807741_9884282_20130222_pict0310_f497...,True,False,,uganda,bwi,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
7748,bwi_vid8_807741_9884282_20130222_pict0310_f593...,True,False,,uganda,bwi,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
7749,bwi_vid8_807741_9884282_20130222_pict0310_f740...,True,False,,uganda,bwi,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...
7750,bwi_vid8_807741_9884282_20130222_pict0310_f97_...,True,False,,uganda,bwi,/jmain02/home/J2AD001/wwp02/oxb63-wwp02/data/c...


In [9]:
# Get full paths to new data
filenames = list(df.path.values)

In [19]:
# Get full path to PanAf500 training data
train = pd.read_csv('original_train.csv', sep=' ', header=None)
original_filenames = list(train[0].values)
# len(filenames), len(original_filenames)

In [21]:
# Combine paths and assign dummy values
x = filenames + original_filenames
new_df = pd.DataFrame(x)
new_df['label'] = 9

In [24]:
# Write to new annotation file
new_df.to_csv('drc-rt-bud-bwi_train.csv', sep=' ', header=False, index=False)