# Dataset Explore
This dataset is organized in different folders, but we're actually interested on a pair of input images and the expected label that highlight differences between the 2 input images

In [1]:
from tqdm import tqdm
import numpy as np
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pickle

train_file_list = './train.txt'
val_file_list = './val.txt'
base_data_dir = './'
train_pickle_file = 'change_dataset_train.pkl'
val_pickle_file = 'change_dataset_val.pkl'

validation_set = {}
training_set = {}

image_size = 256

In [2]:
cmap = [(0, 0, 0),   	    #% ignore       0 'mask-out' (ignore)
        (255, 255, 255),    #% unchanged    1 'no-change'
        (136, 0, 21),   	#% brown/red    2 'barrier'
        (237, 28, 36),  	#% red          3 'bin'
        (255, 127, 39), 	#% orange       4 'construction-maintenance'
        (255, 242, 0),  	#% yellow       5 'misc'
        (34, 177, 76),  	#% dark green   6 'other-objects'
        (0, 162, 232),  	#% light blue   7 'person-cycle'
        (63, 72, 204),  	#% navy blue    8 'rubbish'
        (163, 73, 164), 	#% purple       9 'sign'
        (255, 174, 201),	#% pink         10 'traffic-cone'
        (181, 230, 29)] 	#% lime         11 'vehicle'

def mask_image_generater(image):
    h, w = image.size
    data = np.array(image)
    mask_data = np.zeros((w, h))
    
    for i, col in enumerate(cmap):
        c1 = data[:, :, 0] == col[0]
        c2 = data[:, :, 1] == col[1]
        c3 = data[:, :, 2] == col[2]
        c = c1&c2&c3
        mask_data[c] = i
        
    mask_img = Image.fromarray(mask_data)
    return mask_img

#### Parse the path files

In [3]:
train_file_list = [line.rstrip('\n').split() for line in open(train_file_list)]
val_file_list = [line.rstrip('\n').split() for line in open(val_file_list)]
print('Length Training Set:', len(train_file_list))
print('Length Validation Set:', len(val_file_list))
size_train = len(train_file_list)
size_validation = len(val_file_list)

Length Training Set: 98
Length Validation Set: 54


#### Load Validation Set On Memory

In [4]:
for idx, entry in enumerate(tqdm(val_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0]).convert('RGB')
    test_img = Image.open(base_data_dir + entry[1]).convert('RGB')
    #label_img = Image.open(base_data_dir + entry[2]).convert('RGB')
    label_img = Image.open(base_data_dir + entry[2])
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    # Populate validation dictionary with tupple (reference,test,label)
    validation_set[idx] = reference_img, test_img, label_img    

  0%|                                                                                           | 0/54 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: './VL-CMU-CD-dataset/raw/000/RGB/2_00.png'

#### Load Training Set On Memory

In [30]:
for idx, entry in enumerate(tqdm(train_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0]).convert('RGB')
    test_img = Image.open(base_data_dir + entry[1]).convert('RGB')
    #label_img = Image.open(base_data_dir + entry[2]).convert('RGB')
    label_img = Image.open(base_data_dir + entry[2]).convert('L')
    #label_numpy = np.array(label_img)
    #print('Label shape:', label_numpy.shape)
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_numpy = np.array(label_img)
    print (idx,label_numpy[250,250])
    #print('Label shape:', idx, label_numpy.shape)
    # Populate validation dictionary with tupple (reference,test,label)
    training_set[idx] = reference_img, test_img, label_img    

  2%|▏         | 2/98 [00:00<00:08, 11.59it/s]

0 255
1 255
2 4


  6%|▌         | 6/98 [00:00<00:07, 11.78it/s]

3 0
4 255
5 255


  8%|▊         | 8/98 [00:00<00:07, 11.55it/s]

6 255
7 255
8 0


 12%|█▏        | 12/98 [00:01<00:07, 11.77it/s]

9 255
10 0
11 2


 14%|█▍        | 14/98 [00:01<00:07, 11.82it/s]

12 184
13 255
14 227


 18%|█▊        | 18/98 [00:01<00:06, 11.80it/s]

15 0
16 0
17 0


 20%|██        | 20/98 [00:01<00:06, 11.77it/s]

18 0
19 0
20 0


 24%|██▍       | 24/98 [00:02<00:06, 11.85it/s]

21 0
22 255
23 0


 27%|██▋       | 26/98 [00:02<00:06, 11.87it/s]

24 255
25 255
26 255


 31%|███       | 30/98 [00:02<00:05, 11.95it/s]

27 255
28 0
29 255


 33%|███▎      | 32/98 [00:02<00:05, 11.98it/s]

30 255
31 0
32 0


 37%|███▋      | 36/98 [00:03<00:05, 11.97it/s]

33 0
34 5
35 4


 39%|███▉      | 38/98 [00:03<00:05, 11.95it/s]

36 0
37 0
38 4


 43%|████▎     | 42/98 [00:03<00:04, 11.88it/s]

39 255
40 0
41 0


 45%|████▍     | 44/98 [00:03<00:04, 11.99it/s]

42 255
43 0
44 255


 49%|████▉     | 48/98 [00:04<00:04, 12.01it/s]

45 0
46 255
47 255


 51%|█████     | 50/98 [00:04<00:04, 11.99it/s]

48 0
49 255
50 255


 55%|█████▌    | 54/98 [00:04<00:03, 11.89it/s]

51 255
52 0
53 255


 57%|█████▋    | 56/98 [00:04<00:03, 11.99it/s]

54 0
55 1
56 0


 61%|██████    | 60/98 [00:05<00:03, 11.94it/s]

57 255
58 253
59 255


 63%|██████▎   | 62/98 [00:05<00:03, 11.95it/s]

60 255
61 0
62 255


 67%|██████▋   | 66/98 [00:05<00:02, 11.90it/s]

63 255
64 255
65 0


 69%|██████▉   | 68/98 [00:05<00:02, 11.86it/s]

66 0
67 255
68 255


 73%|███████▎  | 72/98 [00:06<00:02, 11.95it/s]

69 255
70 0
71 170


 76%|███████▌  | 74/98 [00:06<00:01, 12.03it/s]

72 0
73 0
74 0


 80%|███████▉  | 78/98 [00:06<00:01, 12.05it/s]

75 255
76 0
77 235


 82%|████████▏ | 80/98 [00:06<00:01, 12.05it/s]

78 0
79 253
80 0


 86%|████████▌ | 84/98 [00:07<00:01, 12.09it/s]

81 0
82 128
83 0


 88%|████████▊ | 86/98 [00:07<00:00, 12.06it/s]

84 255
85 2
86 0


 92%|█████████▏| 90/98 [00:07<00:00, 12.07it/s]

87 255
88 17
89 0


 94%|█████████▍| 92/98 [00:07<00:00, 12.03it/s]

90 255
91 1
92 255


 98%|█████████▊| 96/98 [00:08<00:00, 11.98it/s]

93 255
94 255
95 255


100%|██████████| 98/98 [00:08<00:00, 11.93it/s]

96 255
97 255





#### Save Dictionaries

In [25]:
print('Saving Pickle Training Set')
with open(train_pickle_file, 'wb') as handle:
    pickle.dump(training_set, handle, protocol=4)
    
print('Saving Pickle Validation Set')
with open(val_pickle_file, 'wb') as handle:
    pickle.dump(validation_set, handle, protocol=4)

Saving Pickle Training Set
Saving Pickle Validation Set


## Explore

In [33]:
@interact(idx=widgets.IntSlider(min=0,max=size_train-1))
def explore_validation_dataset(idx):
    reference_numpy = np.array(training_set[idx][0])
    test_numpy = np.array(training_set[idx][1])
    label_numpy = np.array(training_set[idx][2])
    print('Label shape:', label_numpy.shape)
    print ('Ref shape:', reference_numpy.shape)
    print ('test shape:', test_numpy.shape)
    #plt.imshow(label_numpy)
    #plt.show()
    fig=plt.figure(figsize=(16, 16))
    columns = 3
    rows = 1
    fig.add_subplot(1, 3, 1)
    plt.imshow(reference_numpy)
    fig.add_subplot(1, 3, 2)
    plt.imshow(test_numpy)
    fig.add_subplot(1, 3, 3)
    plt.imshow(label_numpy, cmap='gray', vmin=0, vmax=200)
    plt.show()

interactive(children=(IntSlider(value=0, description='idx', max=97), Output()), _dom_classes=('widget-interact…