In [None]:
from collections import defaultdict
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image
from tqdm import tqdm_notebook as tqdm

In [None]:
DATAPATH = '/data/Waymo'
LABEL_FILE = os.path.join(DATAPATH, 'dataset.txt')
IMAGE_PATH = os.path.join(DATAPATH, 'data')

TRAIN_PATH = os.path.join(DATAPATH, '128x128/train')
TEST_PATH = os.path.join(DATAPATH, '128x128/valid')
NEW_SIZE = 128, 128
OLD_SIZE = 1920, 1280

## Create Dataset

In [None]:
def rand_num():
    return np.random.randint(1, 11)

In [None]:
for _ in os.listdir(TEST_PATH + '/yolov3_0'):
    with open(os.path.join(TEST_PATH, 'yolov3_0', _)) as file:
        count = 0
        for _ in file:
            count += 1
        if count == 0:
            print ('rm ' + _)

In [None]:
for _ in os.listdir(TEST_PATH + '/yolov3_0'):
    with open(os.path.join(TEST_PATH, 'yolov3_0', _)) as file:
        count = 0
        for _ in file:
            count += 1
        if count == 0:
            print ('rm ' + _.replace('txt', 'jpg'))

In [None]:
with open(LABEL_FILE, 'r') as file:
    length = OLD_SIZE[0]
    height = OLD_SIZE[1]
    for line in tqdm(file):
        line = line[:-1]
        line = line.split(' ')
        file = line[0]
        filename = file.split('.')[0]
        annotations = line[1:]
        filepath = os.path.join(IMAGE_PATH, file)
        image = Image.open(filepath)
        size = image.size
        if size == OLD_SIZE:
            FILE_PATH = TRAIN_PATH
            if rand_num() > 8:
                FILE_PATH = TEST_PATH
            image = image.resize(NEW_SIZE)
            label_file = os.path.join(FILE_PATH, 'yolov3/{}.txt'.format(filename))
            with open(label_file, 'w') as file:
                for ann in annotations:
                    ann = ann.split(',')
                    ann[:-1] = map(float, ann[:-1])
                    x, y, w, h = ann[0]/length, ann[1]/height, ann[2]/length, ann[3]/height
                    file.write('{} {} {} {} {}\n'.format(ann[-1], x, y, w, h))
            image_file = os.path.join(FILE_PATH, 'images/{}.jpg'.format(filename))
            image.save(image_file)

## Check Class Priors

In [None]:
class_counts = {'0': 0, '1': 0, '2': 0, '3': 0, '4': 0}
for file in os.listdir(os.path.join(TRAIN_PATH, 'yolov3')):
    with open(os.path.join(TRAIN_PATH, 'yolov3', file), 'r') as file_io:
        for line in file_io:
            class_name = line.split(' ')[0]
            class_counts[class_name] += 1
class_counts

In [None]:
class_counts = {'0':0, '1': 0, '2': 0, '3': 0, '4': 0}
for file in os.listdir(os.path.join(TEST_PATH, 'yolov3')):
    with open(os.path.join(TEST_PATH, 'yolov3', file), 'r') as file_io:
        for line in file_io:
            class_name = line.split(' ')[0]
            class_counts[class_name] += 1
class_counts

## Split Labels Files

In [None]:
len(os.listdir(TRAIN_PATH)), len(os.listdir(TEST_PATH))

In [None]:
for FILE_PATH in [TRAIN_PATH, TEST_PATH]:
    for file in os.listdir(os.path.join(FILE_PATH, 'yolov3')):
        file_name, extension = file.split('.')
        with open(os.path.join(FILE_PATH, 'yolov3', file), 'r') as file_r:
            file_0 = open(os.path.join(FILE_PATH, 'yolov3_0/{}.txt'.format(file_name)), 'w')
            file_1 = open(os.path.join(FILE_PATH, 'yolov3_1/{}.txt'.format(file_name)), 'w')
            for line in file_r:
                if line.split(' ')[0] == '1':
                    line = line.split(' ')
                    line[0] = '0'
                    line = ' '.join(line)
                    file_1.write(line)
                elif line.split(' ')[0] == '2':
                    line = line.split(' ')
                    line[0] = '0'
                    line = ' '.join(line)
                    file_0.write(line)
            file_0.close()
            file_1.close()

In [None]:
len(os.listdir(TRAIN_PATH)), len(os.listdir(TEST_PATH))

## Create train and valid txt

In [None]:
with open(os.path.join(DATAPATH, '128x128/train.txt'), 'w') as file:
    for image_file in os.listdir(os.path.join(TRAIN_PATH, 'images')):
        file.write('{}/128x128/train/images/{}\n'.format(DATAPATH, image_file))

In [None]:
with open(os.path.join(DATAPATH, '128x128/valid.txt'), 'w') as file:
    for image_file in os.listdir(os.path.join(TEST_PATH, 'images')):
        file.write('{}/128x128/valid/images/{}\n'.format(DATAPATH, image_file))

## Validate an Image

In [None]:
test_image = [_ for _ in os.listdir(os.path.join(TRAIN_PATH, 'images'))][np.random.randint(0, 5000)]

In [None]:
image = Image.open(os.path.join(TRAIN_PATH, 'images', test_image))
image = np.array(image)

In [None]:
test_image

In [None]:
filename = test_image.split('.')[0]
file0 = open(os.path.join(TRAIN_PATH, 'yolov3_0/{}.txt'.format(filename)))
file1 = open(os.path.join(TRAIN_PATH, 'yolov3_1/{}.txt'.format(filename)))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(image)
ax.grid(False)
ax.axis('off')
for line in file0:
    x, y, h, w = [_*NEW_SIZE[1] for _ in list(map(float, line.split(' ')[1:]))]
    ax.add_patch(
        Rectangle(
            (x-h/2, y-w/2), 
            h, 
            w,
            linewidth=2,edgecolor='y',facecolor='none'
        )
    )
for line in file1:
    x, y, h, w = [_*NEW_SIZE[1] for _ in list(map(float, line.split(' ')[1:]))]
    ax.add_patch(
        Rectangle(
            (x-h/2, y-w/2), 
            h, 
            w,
            linewidth=2,edgecolor='r',facecolor='none'
        )
    )
file0.close()
file1.close()
plt.show()

In [None]:
ORIG_DATA = os.path.join(DATAPATH, 'data')
image = Image.open(os.path.join(ORIG_DATA, test_image))
image = np.array(image)

In [None]:
filename = test_image.split('.')[0]
file0 = open(os.path.join(TRAIN_PATH, 'yolov3_0/{}.txt'.format(filename)))
file1 = open(os.path.join(TRAIN_PATH, 'yolov3_1/{}.txt'.format(filename)))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(image)
ax.grid(False)
ax.axis('off')
for line in file0:
    x, y, h, w = [_ for _ in list(map(float, line.split(' ')[1:]))]
    x, y, h, w = x*OLD_SIZE[0], y*OLD_SIZE[1], h*OLD_SIZE[0], w*OLD_SIZE[1]
    ax.add_patch(
        Rectangle(
            (x-h/2, y-h/2), 
            h, 
            w,
            linewidth=2,edgecolor='y',facecolor='none'
        )
    )
for line in file1:
    x, y, h, w = [_ for _ in list(map(float, line.split(' ')[1:]))]
    x, y, h, w = x*OLD_SIZE[0], y*OLD_SIZE[1], h*OLD_SIZE[0], w*OLD_SIZE[1]
    ax.add_patch(
        Rectangle(
            (x-h/2, y-w/2), 
            h, 
            w,
            linewidth=2,edgecolor='r',facecolor='none'
        )
    )
file0.close()
file1.close()
plt.show()