In [1]:
from pathlib import Path
import shutil
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
cwd = os.getcwd()

In [2]:
# number of samples per class to select
N = 600

# train (val test) split
SPLIT = 1/2

# size for cutting out of frames
CUT_SIZE = 400

# size of frames to be fed to the network
SIZE = 256

In [3]:
p = Path(cwd, 'raw_data', 'luk-19_39')

### get frames of video

In [4]:
# p = p / 'vid001.mp4'
# os.system("ffmpeg -i "+str(p)+" -r 1/1 raw_data/frames/%05d.bmp")

### get data frame with monkey bb's

drop all monky-less frames

---

results file contents
- rows of: '{frame},{id},{x1},{y1},{w},{h},{score},{cls_id},1\n'
  - class ID's: monkey,patch,kong,branch,XBI
  - frame ID's are all +1

In [5]:
bb_df = pd.read_csv(str((p / 'results.txt')), sep=',', names=['frame','id','x1','y1','w','h','score','cls_id','1'])

# drop last column thats always 1 and score as we ingnore it either way
bb_df.drop(['1', 'score'], axis=1, inplace=True)

# drop all rows not containing a bb for a monkey
bb_df = bb_df[bb_df.cls_id == 0]

# drop column containing cls_id
bb_df.drop('cls_id', axis=1, inplace=True)

# substract 1 from frame column
bb_df.frame -= 1

# drop id column as we do not have a need for it
bb_df.drop('id', axis=1, inplace=True)

# set frame column as index column
bb_df.set_index('frame', inplace=True)

bb_df

Unnamed: 0_level_0,x1,y1,w,h
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3179,293.290375,96.810219,144.305481,275.912628
3180,290.514756,86.990230,150.410568,287.505871
3181,289.840300,85.263030,151.471306,289.545330
3182,289.879814,88.282896,151.800498,290.675820
3183,292.154443,94.321029,146.893474,281.263720
...,...,...,...,...
20995,647.995499,357.507549,241.239540,220.082770
20996,648.881188,358.332447,239.417999,218.464941
20997,649.183209,358.319795,238.839493,218.238129
20998,649.269495,357.899260,238.601506,218.145035


### get data frame with pose information

---

- drop rows containing interactions
- get set of distinct poses

In [6]:
label_df = pd.read_csv(str((p / 'luk_2021-08-27_M1-1_cam19415039_vid000.mp4.txt')), sep=' ', names=['frame','from','to','pose_or_action'])

# drop all rows containing interactions
label_df = label_df[label_df.to == '-']

# drop to column as it is no longer needed
label_df.drop('to', axis=1, inplace=True)

# rename last column as contents are now cleaned up
label_df.rename(columns={'pose_or_action': 'pose'}, inplace=True)

# can drop from column there is only one monkey either way...even if it is detected with multiple ID's
label_df.drop('from', axis=1, inplace=True)

# set frame column as index column
label_df.set_index('frame', inplace=True)

label_df

Unnamed: 0_level_0,pose
frame,Unnamed: 1_level_1
3407,walking
3408,walking
3409,walking
3410,walking
3411,walking
...,...
20992,standing4legs
20993,standing4legs
20994,standing4legs
20995,standing4legs


In [7]:
labels = label_df.pose.unique()
labels

array(['walking', 'sitting', 'standing4legs', 'standing2legs'],
      dtype=object)

### create combined df

---

combine df's of monkey bb's and according pose label

join tables on frame number
- use merge instead of join (takes by default inner join and thus only matches intersection of keys)




In [8]:
joined_df = bb_df.merge(label_df, left_index=True, right_index=True)

# turn bb values to int
joined_df[['x1','y1','w','h']] = joined_df[['x1','y1','w','h']].astype(int)

joined_df

Unnamed: 0_level_0,x1,y1,w,h,pose
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3407,326,148,138,217,walking
3408,371,151,139,115,walking
3408,386,151,129,106,walking
3409,368,153,145,120,walking
3410,371,153,148,122,walking
...,...,...,...,...,...
20992,648,358,240,218,standing4legs
20993,648,357,241,219,standing4legs
20994,647,357,241,220,standing4legs
20995,647,357,241,220,standing4legs


### generate trainings data

---

- get similar numbers of different poses in train and validation sets
- *future* also get similar numbers accross different videos

In [9]:
# get sample count per label
for l in labels:
    print(l, len(joined_df[joined_df.pose == l]))

walking 783
sitting 1413
standing4legs 673
standing2legs 676


In [10]:
# sample N elements per pose
samples = {l: joined_df[joined_df.pose==l].sample(N).drop('pose', axis=1)
                        for l in labels
                    }
samples

{'walking':         x1   y1    w    h
 frame                    
 6857   593  587  206  261
 6072   598  299  192  227
 6291   607  425  152  308
 3645   507  222  180  218
 20639  951  444   77  115
 ...    ...  ...  ...  ...
 6853   599  585  189  250
 4190   570  295  191  205
 6618   590  532  145  215
 6081   613  302  170  229
 6163   641  388  134  269
 
 [600 rows x 4 columns],
 'sitting':         x1   y1    w    h
 frame                    
 3777   563  356  169  181
 4661   536  333  201  187
 5098   533  347  199  174
 4985   534  344  203  176
 4843   533  332  199  189
 ...    ...  ...  ...  ...
 3755   557  345  174  202
 4460   564  294  184  243
 4675   535  334  205  184
 4455   560  294  186  244
 4588   555  317  184  199
 
 [600 rows x 4 columns],
 'standing4legs':         x1   y1    w    h
 frame                    
 4308   594  267  218  207
 6495   591  473  164  260
 6502   592  473  162  259
 6772   591  535  162  206
 20955  640  355  248  229
 ...    ...  ...

In [11]:
# # create train and val split
samples_train = {l: samples[l].iloc[:int(SPLIT*N)]
                        for l in labels
                    }
samples_validation = {l: samples[l].iloc[int(SPLIT*N):int((1-SPLIT*SPLIT)*N)]
                        for l in labels
                    }
samples_test = {l: samples[l].iloc[int((1-SPLIT*SPLIT)*N):]
                        for l in labels
                    }
# from sklearn.model_selection import train_test_split
# X_train, X_validation = train_test_split(samples['walking'],  train_size=SPLIT)
# X_train, X_validation

### collect images and save at correct location

---

- read in images using opencv
- cut images to 256x256 with bb in center
- save images in train / validation folder
  - using subfolders per label

In [12]:
cap = cv2.VideoCapture(str((p / 'vid000.mp4')))
samples[labels[0]].iloc[0]


x1    593
y1    587
w     206
h     261
Name: 6857, dtype: int64

In [13]:
for l in labels:
    print(l)
    for index, row in samples_test[l].iterrows():
        x1, y1, w, h = row
        # print(index, row)
        cap.set(cv2.CAP_PROP_POS_FRAMES, index)
        ret, frame = cap.read()
        # print(frame.shape)
        (fh, fw, c) = frame.shape
        
        # if w <= CUT_SIZE and h <= CUT_SIZE:
        # corners = [[x1 - (SIZE-w)//2, y1 - (SIZE-h)//2],
        #             [x1 + w + (SIZE-w)//2, y1 - (SIZE-h)//2],
        #             [x1 + w + (SIZE-w)//2, y1 + h + (SIZE-h)//2],
        #             [x1 - (SIZE-w)//2, y1 + h + (SIZE-h)//2]
        #         ]
        y = np.clip([y1 - (CUT_SIZE-h)//2, y1 + h + (CUT_SIZE-h)//2], 0, fh)
        x = np.clip([x1 - (CUT_SIZE-w)//2, x1 + w + (CUT_SIZE-w)//2], 0, fw)
        frame = frame[y[0]:y[1],x[0]:x[1],:]

        # pad: top, bottom, left, right
        if frame.shape[0] < CUT_SIZE:
            print(frame.shape[0])
        if frame.shape[1] < CUT_SIZE:
            print(frame.shape[1])
        print(frame.shape)
        # w_pad, h_pad = max(0, w - CUT_SIZE), max(0, h - CUT_SIZE)# bb width and height always smaller thatn cut size (in normal cases)
        w_pad, h_pad = max(0, CUT_SIZE - frame.shape[0]), max(0, CUT_SIZE - frame.shape[1])
        print(h_pad, w_pad)
        if w_pad > 0 or h_pad > 0:
            # if frame.shape[0] < CUT_SIZE or frame.shape[1] < CUT_SIZE:
            print('cut dimension: ', frame.shape[:2])
            print('above: ',h_pad//2, ' below: ',h_pad//2+h_pad%2)
            print('left: ',w_pad//2, ' right: ',w_pad//2+w_pad%2)
            print('-'*50)

            frame = np.pad(frame, ((w_pad//2, w_pad//2+w_pad%2), (h_pad//2, h_pad//2+h_pad%2), (0,0)), 'constant')
            print('alive')
        # padded_frame = cv2.copyMakeBorder(frame, frame.shape[])
        # else:
        #     # this is a problem!!!!
        #     pass
        # print(index)
        cv2.imwrite(str((p / 'poses' / 'test' / l))+"/{:05d}.png".format(index), frame)

        

        # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # plt.imshow(frame)
        # plt.title(l)
        # plt.show()
        # print(x1, y1, w, h)
        # break
   

walking
399
289
(399, 289, 3)
111 1
cut dimension:  (399, 289)
above:  55  below:  56
left:  0  right:  1
--------------------------------------------------
alive
329
(329, 400, 3)
0 71
cut dimension:  (329, 400)
above:  0  below:  0
left:  35  right:  36
--------------------------------------------------
alive
(400, 400, 3)
0 0
399
(399, 400, 3)
0 1
cut dimension:  (399, 400)
above:  0  below:  0
left:  0  right:  1
--------------------------------------------------
alive
290
(400, 290, 3)
110 0
cut dimension:  (400, 290)
above:  55  below:  55
left:  0  right:  0
--------------------------------------------------
alive
(400, 400, 3)
0 0
334
399
(334, 399, 3)
1 66
cut dimension:  (334, 399)
above:  0  below:  1
left:  33  right:  33
--------------------------------------------------
alive
399
(399, 400, 3)
0 1
cut dimension:  (399, 400)
above:  0  below:  0
left:  0  right:  1
--------------------------------------------------
alive
(400, 400, 3)
0 0
399
234
(399, 234, 3)
166 1
cut di