# Dataset Explore
This dataset is organized in different folders, but we're actually interested on a pair of input images and the expected label that highlight differences between the 2 input images

In [1]:
from tqdm import tqdm
import numpy as np
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pickle

train_file_list = './train.txt'
val_file_list = './val.txt'
base_data_dir = '../'
train_pickle_file = 'change_dataset_train.pkl'
val_pickle_file = 'change_dataset_val.pkl'

validation_set = {}
training_set = {}

image_size = 256

In [2]:
cmap = [(0, 0, 0),   	    #% ignore       0 'mask-out' (ignore)
        (255, 255, 255),    #% unchanged    1 'no-change'
        (136, 0, 21),   	#% brown/red    2 'barrier'
        (237, 28, 36),  	#% red          3 'bin'
        (255, 127, 39), 	#% orange       4 'construction-maintenance'
        (255, 242, 0),  	#% yellow       5 'misc'
        (34, 177, 76),  	#% dark green   6 'other-objects'
        (0, 162, 232),  	#% light blue   7 'person-cycle'
        (63, 72, 204),  	#% navy blue    8 'rubbish'
        (163, 73, 164), 	#% purple       9 'sign'
        (255, 174, 201),	#% pink         10 'traffic-cone'
        (181, 230, 29)] 	#% lime         11 'vehicle'

def mask_image_generater(image):
    h, w = image.size
    data = np.array(image)
    mask_data = np.zeros((w, h))
    
    for i, col in enumerate(cmap):
        c1 = data[:, :, 0] == col[0]
        c2 = data[:, :, 1] == col[1]
        c3 = data[:, :, 2] == col[2]
        c = c1&c2&c3
        mask_data[c] = int(i>1)
        
    mask_img = Image.fromarray(mask_data)
    return mask_img

#### Parse the path files

In [3]:
train_file_list = [line.rstrip('\n').split() for line in open(train_file_list)]
val_file_list = [line.rstrip('\n').split() for line in open(val_file_list)]
print('Length Training Set:', len(train_file_list))
print('Length Validation Set:', len(val_file_list))
size_train = len(train_file_list)
size_validation = len(val_file_list)

Length Training Set: 1187
Length Validation Set: 175


#### Load Validation Set On Memory

In [4]:
for idx, entry in enumerate(tqdm(val_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0]).convert('RGB')
    test_img = Image.open(base_data_dir + entry[1]).convert('RGB')
    #label_img = Image.open(base_data_dir + entry[2]).convert('RGB')
    label_img = Image.open(base_data_dir + entry[2])
    label_img = mask_image_generater(label_img)
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    # Populate validation dictionary with tupple (reference,test,label)
    validation_set[idx] = reference_img, test_img, label_img    

100%|██████████| 175/175 [00:12<00:00, 13.63it/s]


#### Load Training Set On Memory

In [30]:
for idx, entry in enumerate(tqdm(train_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0]).convert('RGB')
    test_img = Image.open(base_data_dir + entry[1]).convert('RGB')
    #label_img = Image.open(base_data_dir + entry[2]).convert('RGB')
    #label_img = Image.open(base_data_dir + entry[2]).convert('L')\n",
    label_img = Image.open(base_data_dir + entry[2])
    label_img = mask_image_generater(label_img)
    #label_numpy = np.array(label_img)
    #print('Label shape:', label_numpy.shape)
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_numpy = np.array(label_img)
    #print (idx,label_numpy[250,250])
    #print('Label shape:', idx, label_numpy.shape)
    # Populate training dictionary with tupple (reference,test,label)
    training_set[idx] = reference_img, test_img, label_img    

100%|██████████| 1187/1187 [01:28<00:00, 13.37it/s]


#### Save Dictionaries

In [25]:
print('Saving Pickle Training Set')
with open(train_pickle_file, 'wb') as handle:
    pickle.dump(training_set, handle, protocol=4)
    
print('Saving Pickle Validation Set')
with open(val_pickle_file, 'wb') as handle:
    pickle.dump(validation_set, handle, protocol=4)

Saving Pickle Training Set
Saving Pickle Validation Set


## Explore

In [33]:
@interact(idx=widgets.IntSlider(min=0,max=size_train-1))
def explore_validation_dataset(idx):
    reference_numpy = np.array(training_set[idx][0])
    test_numpy = np.array(training_set[idx][1])
    label_numpy = np.array(training_set[idx][2])
    '''
    reference_numpy = np.array(validation_set[idx][0])
    test_numpy = np.array(validation_set[idx][1])
    label_numpy = np.array(validation_set[idx][2])
    '''
    print('Label shape:', label_numpy.shape)
    print ('Ref shape:', reference_numpy.shape)
    print ('test shape:', test_numpy.shape)
    #plt.imshow(label_numpy)
    #plt.show()
    fig=plt.figure(figsize=(16, 16))
    columns = 3
    rows = 1
    fig.add_subplot(1, 3, 1)
    plt.imshow(reference_numpy)
    fig.add_subplot(1, 3, 2)
    plt.imshow(test_numpy)
    fig.add_subplot(1, 3, 3)
    plt.imshow(label_numpy)#, cmap='gray', vmin=0, vmax=200)
    plt.show()

interactive(children=(IntSlider(value=0, description='idx', max=1186), Output()), _dom_classes=('widget-intera…