## Splitting into Train and Val

In [11]:
from __future__ import print_function
import json
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

import zipfile as zf
files = zf.ZipFile("validation.zip", 'r')
files.extractall('data')
files.close()

In [12]:
import os
base_path = os.getcwd()
base_path

'/Users/michalgolovanevsky/SeniorProject'

image_path = os.path.join(base_path, "data/images") 
images = os.listdir(image_path)

label_path = os.path.join(base_path, "data/labels") 
labels = os.listdir(label_path)

test_path = os.path.join(base_path, "data/test")
train_path = os.path.join(base_path, "data/train")

os.makedirs(test_path)
os.makedirs(train_path)

import random
import shutil

files_to_move = random.sample(images, int(0.1*len(images)))
for _f in files_to_move:
    origin_path = os.path.join(image_path, _f)
    dest_dir = os.path.join(test_path, "images")
    dest_path = os.path.join(dest_dir, _f)
    if not os.path.isdir(dest_dir):
        os.makedirs(dest_dir)
    shutil.move(origin_path, dest_path)

files_to_move = random.sample(labels, int(0.1*len(labels)))
for _f in files_to_move:
    origin_path = os.path.join(label_path, _f)
    dest_dir = os.path.join(test_path, "labels")
    dest_path = os.path.join(dest_dir, _f)
    if not os.path.isdir(dest_dir):
        os.makedirs(dest_dir)
    shutil.move(origin_path, dest_path)

shutil.move(label_path, train_path)
shutil.move(image_path, train_path)

In [13]:
with open('config.json') as config_file:
    config = json.load(config_file)
# in this example we are only interested in the labels
labels = config['labels']

# print labels
print("There are {} labels in the config file".format(len(labels)))
labels_dict = {}
for label_id, label in enumerate(labels):
    labels_dict[label["name"]] = label["color"]

There are 66 labels in the config file


In [14]:
train_image_path = os.path.join(base_path, "data/train/images")
train_label_path = os.path.join(base_path, "data/train/labels")

In [87]:
train_images = os.listdir(train_image_path)
train_labels = os.listdir(train_label_path)

In [16]:
train_images.sort()
train_labels.sort()

In [76]:
train_images_names = []
for image in train_images:
    image = image.replace(".jpg", "")
    train_images_names.append(image)

In [77]:
train_labels_names = []
for label in train_labels:
    label = label.replace(".png", "")
    train_labels_names.append(label)

In [69]:
dif1 = list(set(train_labels_names) - set(train_images_names))

In [94]:
bad_files = []
for file in dif1:
    file += ".png"
    bad_files.append(file)
len(bad_files)

186

In [96]:
train_labels = list(set(train_labels) - set(bad_files))

In [20]:
def apply_color_map(image_array, labels):
    color_array = np.zeros((image_array.shape[0], image_array.shape[1], 3), dtype=np.uint8)

    for label_id, label in enumerate(labels):
        # set all pixels with the current label to the color of the current label
        color_array[image_array == label_id] = label["color"]

    return color_array

In [97]:
rows = []
for label in train_labels[:200]:
    label_path = os.path.join(train_label_path, label)
    instance_path = os.path.join(train_label_path, label)
    panoptic_path = os.path.join(train_label_path, label)
    label_image = Image.open(label_path)
    instance_image = Image.open(instance_path)
    panoptic_image = Image.open(panoptic_path)
    
    # convert labeled data to numpy arrays for better handling
    label_array = np.array(label_image, dtype=np.uint16)
    instance_array = np.array(instance_image, dtype=np.uint16)

    # now we split the instance_array into labels and instance ids
    instance_label_array = np.array(instance_array / 256, dtype=np.uint8)
    instance_ids_array = np.array(instance_array % 256, dtype=np.uint8)

    # for visualization, we apply the colors stored in the config
    colored_label_array = apply_color_map(label_array, labels)
    colored_instance_label_array = apply_color_map(instance_label_array, labels)
    colors = []
    for i in range(colored_label_array.shape[0]):
        array = np.unique(colored_label_array[i], axis = 0)
        if list(array[0]) not in colors:
            colors.append(list(array[0]))
    items_in_photo = []
    for item in labels_dict:
        color = labels_dict[item]
        if color in colors:
            items_in_photo.append(1)
        else:
            items_in_photo.append(0)
    rows.append(items_in_photo)

In [98]:
import pandas as pd
label_columns = list(labels_dict.keys())
df = pd.DataFrame(rows, columns = label_columns)

In [99]:
df["image"] = train_labels[:200]

In [100]:
label_columns.insert(0, "image")

In [101]:
df = df[label_columns]
df["image"] = df["image"].str.replace("png", "jpg")

In [103]:
df.to_csv("image_data.csv", index = False)