# Dataset Explore
This dataset is organized in different folders, but we're actually interested on a pair of input images and the expected label that highlight differences between the 2 input images

In [1]:
from tqdm import tqdm
import numpy as np
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pickle

train_file_list = '../ChangeDataset/train.txt'
val_file_list = '../ChangeDataset/val.txt'
base_data_dir = '../ChangeDataset/'
train_pickle_file = 'change_dataset_train.pkl'
val_pickle_file = 'change_dataset_val.pkl'

validation_set = {}
training_set = {}

image_size = 256

#### Parse the path files

In [2]:
train_file_list = [line.rstrip('\n').split() for line in open(train_file_list)]
val_file_list = [line.rstrip('\n').split() for line in open(val_file_list)]
print('Length Training Set:', len(train_file_list))
print('Length Validation Set:', len(val_file_list))
size_train = len(train_file_list)
size_validation = len(val_file_list)

Length Training Set: 73276
Length Validation Set: 18319


#### Load Validation Set On Memory

In [3]:
for idx, entry in enumerate(tqdm(val_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0])
    test_img = Image.open(base_data_dir + entry[1])
    label_img = Image.open(base_data_dir + entry[2])
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    # Populate validation dictionary with tupple (reference,test,label)
    validation_set[idx] = reference_img, test_img, label_img    

100%|██████████| 18319/18319 [05:54<00:00, 54.50it/s]


#### Load Training Set On Memory

In [None]:
for idx, entry in enumerate(tqdm(train_file_list)):
    # Load the reference, test and label images
    reference_img = Image.open(base_data_dir + entry[0])
    test_img = Image.open(base_data_dir + entry[1])
    label_img = Image.open(base_data_dir + entry[2])
    # Resize images
    reference_img = reference_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    test_img = test_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    label_img = label_img.resize((image_size, image_size), PIL.Image.ANTIALIAS)
    # Populate validation dictionary with tupple (reference,test,label)
    training_set[idx] = reference_img, test_img, label_img    

 43%|████▎     | 31628/73276 [10:16<14:00, 49.55it/s]

#### Save Dictionaries

In [None]:
print('Saving Pickle Training Set')
with open(train_pickle_file, 'wb') as handle:
    pickle.dump(training_set, handle, protocol=4)
    
print('Saving Pickle Validation Set')
with open(val_pickle_file, 'wb') as handle:
    pickle.dump(validation_set, handle, protocol=4)

## Explore

In [None]:
@interact(idx=widgets.IntSlider(min=0,max=size_validation-1))
def explore_validation_dataset(idx):
    reference_numpy = np.array(validation_set[idx][0])
    test_numpy = np.array(validation_set[idx][1])
    label_numpy = np.array(validation_set[idx][2])
    print('Label shape:', label_numpy.shape)
    #plt.imshow(label_numpy)
    #plt.show()
    fig=plt.figure(figsize=(16, 16))
    columns = 3
    rows = 1
    fig.add_subplot(1, 3, 1)
    plt.imshow(reference_numpy)
    fig.add_subplot(1, 3, 2)
    plt.imshow(test_numpy)
    fig.add_subplot(1, 3, 3)
    plt.imshow(label_numpy)
    plt.show()