In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
%matplotlib inline

In [2]:
# local path to github repository, change as you see fit
WORKING_DIR = '/Users/ebyler/gh/dice_detection'

In [3]:
full_image_list = glob.glob(WORKING_DIR+'/data/JPEGImages/*.jpg')

# Re-label data from YOLO format
I used labelImg (https://github.com/tzutalin/labelImg) to label the dice images; LabelImg can output annotations in either PascalVOC or YOLO format. I used the YOLO format, which generates boxes with (x, y , l, w) annotations, where x and y are the upper left corner of the box and l and w are the normalized box length and width.


In [4]:
def get_yolo_file(fname, dir_=WORKING_DIR+'/data/Annotations'):
    '''
    takes image filename and returns annotation filename
    e.g. data/JPEGImages/IMG_XXX.jpg
         data/Annotations/IMG_XXX.txt
    '''
    txt_file = fname.replace('.jpg', '.txt')
    full_path = '/'.join([dir_, txt_file])
    if os.path.isfile(full_path):
        return full_path
    else:
        print('ERROR')
        return None

def return_yolo_data(jpg_fname, ann_dir=WORKING_DIR+'/data/Annotations'):
    '''
    Reads annotation file, returns name and data
    '''
    base_fname = jpg_fname.split('/')[-1] # no path
    txt_fname = get_yolo_file(base_fname, dir_=ann_dir)
    data = np.genfromtxt(txt_fname, names='class,x,y,w,h')
    # don't fail if there is only one labeled object in an image
    data = np.atleast_1d(data)
    # keep file name associated with each label
    fname_arr = [base_fname]*data.shape[0]
    return fname_arr, data

def str_class(val):
    '''
    Return string label for index. 
    YOLO is zero indexed.
    '''
    if val == 0:
        return 'one'
    elif val == 1:
        return 'two'
    elif val == 2:
        return 'three'
    elif val == 3:
        return 'four'
    elif val == 4:
        return 'five'
    elif val == 5:
        return 'six'

def create_image_dict(image_list, ann_dir=WORKING_DIR+'/data/Annotations'):
    '''
    For input list of images, return a dictionary with label information.
    dictionary keys: image file, class (string), x, y, h, w 
    '''
    # empty lists for dictionary
    fnames, classes = [], []
    x,y,h,w = [],[],[],[]
    for jpg_fname in image_list:
        fs, data = return_yolo_data(jpg_fname, ann_dir=ann_dir)
        for f, d in zip(fs, data):
            fnames.append(f)
            classes.append(str_class(d['class']))
            x.append(d['x'])
            y.append(d['y'])
            h.append(d['h'])
            w.append(d['w'])
    out_dict = {'filename':fnames, 'class':classes, 'x':x, 'y':y, 'h':h, 'w':w}
    return out_dict

### Actually create the data dictionary

In [5]:
label_dict = create_image_dict(full_image_list)

# create pandas dataFrame for easier manipulation
full_labels = pd.DataFrame.from_dict(label_dict)

# all images have been resized to 640x640 pixels
# so normalized height and width are relative to this
# (surprisingly easy task in apple's "preview", but could be done with PIL)
full_labels['width'] = 640.
full_labels['height'] = 640.

### YOLO format > TFRecord-like values
YOLO format: (x,y) is the normalized coordinate of the upper left box corner.
TFRecords wants the un-normalized values of the xmin, xmax, ymin, ymax for each box.

In [6]:
# calculate xmin, xmax, multiply by box width (640)
full_labels['xmin'] = full_labels['width']*(full_labels['x'] - (full_labels['w']/2.0))
full_labels['xmax'] = full_labels['width']*(full_labels['x'] + (full_labels['w']/2.0))
# calculate ymin, ymax, multiply by box height (640)
full_labels['ymin'] = full_labels['height']*(full_labels['y'] - (full_labels['w']/2.0))
full_labels['ymax'] = full_labels['height']*(full_labels['y'] + (full_labels['w']/2.0))

# I like it when these things print with a reasonable number of significant figures
full_labels['ymin'] = full_labels['ymin'].map(lambda x: '%3.2f' % x)
full_labels['ymax'] = full_labels['ymax'].map(lambda x: '%3.2f' % x)
full_labels['xmin'] = full_labels['xmin'].map(lambda x: '%3.2f' % x)
full_labels['xmax'] = full_labels['xmax'].map(lambda x: '%3.2f' % x)

# take a peak at what the dataFrame looks like
full_labels.head

<bound method NDFrame.head of                      filename class         x         y         h         w  \
0     IMG_20191209_100348.jpg   one  0.558480  0.641447  0.066520  0.069444   
1     IMG_20191209_100348.jpg   one  0.469846  0.177266  0.062135  0.063231   
2     IMG_20191209_100348.jpg   two  0.587171  0.387244  0.067617  0.069810   
3     IMG_20191209_100348.jpg  four  0.625548  0.468567  0.053363  0.055190   
4     IMG_20191209_100348.jpg  four  0.428363  0.547332  0.059576  0.059942   
...                       ...   ...       ...       ...       ...       ...   
1790  IMG_20191209_095809.jpg   two  0.653692  0.365680  0.059576  0.065424   
1791  IMG_20191209_095809.jpg  four  0.190789  0.387792  0.055556  0.065058   
1792  IMG_20191209_095809.jpg  five  0.341923  0.430373  0.061038  0.057383   
1793  IMG_20191209_095809.jpg  five  0.625548  0.245980  0.062135  0.059576   
1794  IMG_20191209_095809.jpg   six  0.378655  0.215461  0.063231  0.066520   

      width  height  

# Create a training and a test (val) set
Each dice image has between 1 and 25 dice. We don't actually want to do a raw separation of our data based on the list of labeled objects - we want to group them by the filename.
Thankfully, pandas makes this quite easy!

In [7]:
# group dataframe by filename
grouped = full_labels.groupby('filename')

# see the relative frequency of dice counts per image
# Most of my images (52) had 6 dice.
# 2 images had 25 dice in them (!)
grouped.apply(lambda x: len(x)).value_counts()

6     52
5     39
7     29
4     23
3     14
1     13
10    12
8     11
2     10
9      8
11     8
12     7
22     7
14     3
24     2
15     2
16     2
18     2
21     2
25     2
13     1
19     1
dtype: int64

In [8]:
gb = full_labels.groupby('filename')
grouped_list = [gb.get_group(x) for x in gb.groups]

Make an 80/20 split for the training/test sets

In [9]:
# total number of images
gl_len = len(grouped_list)

# I want 80% of the images in the training set
desired_split = 0.8

train_len = int(gl_len*desired_split)

# the test length is then 1-0.8=0.2
# this way is just easier to ignore fractional images
test_len = gl_len - train_len

print('An {0:.1f}/{1:.1f} split on {2} images: '.format(desired_split, 1.-desired_split, gl_len))
print('Train: {}'.format(train_len))
print('Test: {}'.format(test_len))

An 0.8/0.2 split on 250 images: 
Train: 200
Test: 50


Randomly choose train_len images from the original image list. The test indices will then be the remaining images.

In [10]:
train_index = np.random.choice(len(grouped_list), size=train_len, replace=False)
test_index = np.setdiff1d(list(range(gl_len)), train_index)

print('Double-check that size of train/test inds are as expected:')
print(len(train_index), len(test_index))

Double-check that size of train/test inds are as expected:
200 50


In [11]:
# create individual dataFrames for the test/train sets:
# take first N files in index lists
train = pd.concat([grouped_list[i] for i in train_index])
test = pd.concat([grouped_list[i] for i in test_index])

# Print the number of objects in each set.
# This will be different than the total number of images
# in each set, since images have more than one object.
print('Total number of objects in the train/test sets:')
print(len(train), len(test))

Total number of objects in the train/test sets:
1487 308


# Print train/test sets to csv

In [12]:
SAVE_DIR = WORKING_DIR+'/data/ImageSets/Main'
cols = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
train.to_csv(SAVE_DIR+'/train_labels.csv', index=None, columns=cols)
test.to_csv(SAVE_DIR+'/test_labels.csv', index=None, columns=cols)