In [26]:
import os

In [1]:
import numpy as np
import pandas as pd

In [21]:
from PIL import Image

### Explore training data

In [27]:
DATA_DIR = '../../../data'

In [28]:
df = pd.read_csv(f'{DATA_DIR}/train.csv')

# Process the annotations column
df['annotations'] = df['annotations'].map(eval)
df['num_annotations'] = df['annotations'].map(len)

In [3]:
df.shape

(23501, 7)

In [4]:
df.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,num_annotations
0,0,40258,0,0,0-0,[],0
1,0,40258,1,1,0-1,[],0
2,0,40258,2,2,0-2,[],0
3,0,40258,3,3,0-3,[],0
4,0,40258,4,4,0-4,[],0


One factor we may need to consider, we'll need to do the dataset split carefully. We don't want adjacent frames in the train/test set. Let's take a look at the statistics.

#### Basic stats on videos, frames, sequences, etc.

In [5]:
print(f"Unique video ids: {df['video_id'].unique()}")

Unique video ids: [0 1 2]


In [6]:
print(f"Unique sequences: {df['sequence'].unique().shape[0]}")

Unique sequences: 20


In [7]:
df.groupby(['video_id']).agg({'sequence': [pd.Series.nunique, 'count']}).sort_index()

Unnamed: 0_level_0,sequence,sequence
Unnamed: 0_level_1,nunique,count
video_id,Unnamed: 1_level_2,Unnamed: 2_level_2
0,8,6708
1,8,8232
2,4,8561


In [8]:
df.groupby(['video_id','sequence']).agg({'sequence': 'count'}).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,sequence
video_id,sequence,Unnamed: 2_level_1
0,996,923
0,8399,1423
0,35305,853
0,40258,480
0,45015,617
0,45518,798
0,53708,1077
0,59337,537
1,8503,2843
1,15827,770


In [46]:
df.groupby(['video_id','sequence']).agg({'sequence': 'count'}).sort_index().shape

(20, 1)

In [20]:
print(f"Unique image ids: {df['image_id'].unique().shape[0]}")

Unique image ids: 23501


#### Basic stats on annotations

In [9]:
print(f'Number empty images: {df[df["num_annotations"]==0].shape[0]}')
print(f'Number non-empty images: {df[df["num_annotations"]!=0].shape[0]}')

Number empty images: 18582
Number non-empty images: 4919


In [10]:
df['num_annotations'].value_counts().sort_index()

0     18582
1      2801
2       942
3       374
4       240
5       134
6        59
7        61
8        58
9        32
10       26
11       44
12       17
13       19
14       22
15       40
16       28
17       19
18        3
Name: num_annotations, dtype: int64

In [13]:
df['num_annotations'].value_counts().sum()

23501

In [16]:
df['num_annotations'].sum() + df[df['num_annotations']==0].shape[0]

30480

So we'll probably want to do some downsampling of negative examples, but this may be a hyper-parameter that we tune. Unrelated, but we may also need to do some downsampling of adjacent video frames.

In [11]:
df[df['num_annotations']==1]['annotations'].values[:3]

array([list([{'x': 559, 'y': 213, 'width': 50, 'height': 32}]),
       list([{'x': 558, 'y': 213, 'width': 50, 'height': 32}]),
       list([{'x': 557, 'y': 213, 'width': 50, 'height': 32}])],
      dtype=object)

In [12]:
df[df['num_annotations']==2]['annotations'].values[:3]

array([list([{'x': 520, 'y': 151, 'width': 78, 'height': 62}, {'x': 598, 'y': 204, 'width': 58, 'height': 32}]),
       list([{'x': 520, 'y': 148, 'width': 80, 'height': 63}, {'x': 598, 'y': 206, 'width': 61, 'height': 34}]),
       list([{'x': 521, 'y': 144, 'width': 82, 'height': 65}, {'x': 599, 'y': 208, 'width': 64, 'height': 36}])],
      dtype=object)

So the annotations for an image are stored as a list of dicts. Cool.

Where do the negative annotations come from? Is it somewhat uniform, or biased towards certain sequences?

In [42]:
def count_negatives(x):
    count = 0
    
    for element in x:
        if element==0:
            count += 1
            
    return count

In [45]:
dfg = df.groupby(['video_id']).agg({'sequence': 'count', 'num_annotations': count_negatives}).sort_index()
dfg = dfg.rename(columns={'sequence': 'images', 'num_annotations': 'empty'})
dfg['frac_empty'] = dfg['empty'] / dfg['images']
dfg.sort_index()

Unnamed: 0_level_0,images,empty,frac_empty
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6708,4565,0.680531
1,8232,6133,0.745019
2,8561,7884,0.92092


In [44]:
dfg = df.groupby(['video_id','sequence']).agg({'sequence': 'count', 'num_annotations': count_negatives}).sort_index()
dfg = dfg.rename(columns={'sequence': 'images', 'num_annotations': 'empty'})
dfg['frac_empty'] = dfg['empty'] / dfg['images']
dfg.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,images,empty,frac_empty
video_id,sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,996,923,714,0.773564
0,8399,1423,859,0.603654
0,35305,853,773,0.906213
0,40258,480,195,0.40625
0,45015,617,595,0.964344
0,45518,798,675,0.845865
0,53708,1077,373,0.346332
0,59337,537,381,0.709497
1,8503,2843,1743,0.613085
1,15827,770,696,0.903896


#### Explore image sizes

In [32]:
# img = Image.open(f'{DATA_DIR}/train_images/video_0/0.jpg')

In [37]:
width, height = [], []

for o in df.itertuples():
    video_id = o.video_id
    video_frame = o.video_frame
    
    img = Image.open(f'{DATA_DIR}/train_images/video_{video_id}/{video_frame}.jpg')
    img_width, img_height = img.size
    
    width.append(img_width)
    height.append(img_height)
    
    img.close()

In [38]:
df['width'] = width
df['height'] = height

In [39]:
df.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,num_annotations,width,height
0,0,40258,0,0,0-0,[],0,1280,720
1,0,40258,1,1,0-1,[],0,1280,720
2,0,40258,2,2,0-2,[],0,1280,720
3,0,40258,3,3,0-3,[],0,1280,720
4,0,40258,4,4,0-4,[],0,1280,720


In [40]:
df['width'].unique()

array([1280])

In [41]:
df['height'].unique()

array([720])