# Build an Autoencoder for creating home-embeddings

In [None]:
import sys  
sys.path.insert(0, '../visual_home_finder')
import config, paths

import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np
import pickle
from matplotlib import pyplot as plt

IMG_SIZE = 224

In [None]:
# Various input parameters for the model
training_file = "training_data.pickle"
validation_file = "validation_data.pickle"
test_file = "test_data.pickle"

Build a datastructure that contains training, validation and test data. This only needs to be done once. If pickle file with data is already available, just load that data.

In [None]:
def load_images_from_files_into_numpy_array(list_of_files, image_size):
    data = list()
    for ff in list_of_files:
        image_pil = image.load_img(ff, target_size = (image_size,image_size))
        image_array = image.img_to_array(image_pil)
        image_array = image_array/256 # Normalize array to be between zero and 1
        data.append(image_array)
    data = np.array(data)
    return data

In [None]:
# Load Training, Validation and Test data in to numpy arrays
num_test = len(list(paths.list_images(config.TEST_PATH)))

if not os.path.isfile(training_file):
    train_files = list(paths.list_images(config.TRAIN_PATH))
    print('Number of training images are : %d'%len(train_files))
    training_data = load_images_from_files_into_numpy_array(train_files, IMG_SIZE)
    pickle.dump(training_data, open(training_file, 'wb'))
else:
    training_data = pickle.load(open(training_file, 'rb'))
print('Shape of Training data is '+ str(np.shape(training_data)))
        
if not os.path.isfile(validation_file):
    val_files = list(paths.list_images(config.VAL_PATH))
    print('Number of validation images are : %d'%len(val_files))
    validation_data = load_images_from_files_into_numpy_array(val_files, IMG_SIZE)
    pickle.dump(validation_data, open(validation_file, 'wb'))
else:
    validation_data = pickle.load(open(validation_file, 'rb'))
print('Shape of Validation data is '+ str(np.shape(validation_data)))

if not os.path.isfile(test_file):
    test_files = list(paths.list_images(config.TEST_PATH))
    print('Number of test images are : %d'%len(test_files))
    test_data = load_images_from_files_into_numpy_array(test_files, IMG_SIZE)
    pickle.dump(test_data, open(test_file, 'wb'))
else:
    test_data = pickle.load(open(test_file, 'rb'))
print('Shape of Test data is '+ str(np.shape(test_data)))

In [None]:
# train_aug = ImageDataGenerator(
#     zoom_range=0.1,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     shear_range=0.1,
#     horizontal_flip=True,
#     fill_mode="nearest")  # To scale each image between -1 and 1

# # Initialize validation data augmentation object
# val_aug = ImageDataGenerator()

In [None]:
# # Just for testing the normalizations on the data
# batch_size = 32
# iterator = train_aug.flow(
#     x = training_data,
#     shuffle = True,
#     batch_size=batch_size)
# batchX = iterator.next()
# # Mean should be around 0, Max and min should be within [-1,1]
# print(batchX.shape, batchX.mean(), batchX.max(), batchX.min())

In [None]:
# Set up the training, test and validation flow
# batch_size = 32
# train_gen = train_aug.flow(
#     x = training_data,
#     shuffle = True,
#     batch_size=batch_size)

# val_gen = train_aug.flow(
#     x = training_data,
#     shuffle = False,
#     batch_size=batch_size)

# test_gen = train_aug.flow(
#     x = test_data,
#     shuffle = False,
#     batch_size=batch_size)

In [None]:
# Build the Auto-encoder model

input_img = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

model = Conv2D(8, (3, 3), activation='relu', padding='same')(input_img)  # O/P shape = (224, 224, 8)
model = MaxPooling2D((2, 2))(model)  # O/P shape = (112, 112, 8)
model = Conv2D(16, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (112, 112, 16)
model = MaxPooling2D((2, 2))(model)  # O/P shape = (56, 56, 16)
model = Conv2D(32, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (56, 56, 32)
model = MaxPooling2D((2, 2))(model)  # O/P shape = (28, 28, 32)
model = Conv2D(64, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (28, 28, 64)
model = MaxPooling2D((2, 2))(model)  # O/P shape = (14, 14, 64)
model = Conv2D(128, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (28, 28, 128)
encoded = MaxPooling2D((2, 2))(model)  # O/P shape = (7, 7, 128)

model = Conv2D(128, (3, 3), activation='relu', padding='same')(encoded)  # O/P shape = (7, 7, 128)
model = UpSampling2D((2, 2)) (model)  # O/P shape = (14, 14, 128)
model = Conv2D(64, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (14, 14, 64)
model = UpSampling2D((2, 2)) (model)  # O/P shape = (28, 28, 64)
model = Conv2D(32, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (28, 28, 32)
model = UpSampling2D((2, 2)) (model)  # O/P shape = (56, 56, 32)
model = Conv2D(16, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (56, 56, 16)
model = UpSampling2D((2, 2)) (model)  # O/P shape = (112, 112, 16)
model = Conv2D(8, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (112, 112, 8)
model = UpSampling2D((2, 2)) (model)  # O/P shape = (224, 224, 8)
decoded = Conv2D(3, (3, 3), activation='relu', padding='same')(model)  # O/P shape = (224, 224, 3)

my_first_autoencoder = Model(input_img, decoded)

initial_lr = 0.001
opt = Adam(lr = initial_lr)

my_first_autoencoder.compile(loss="mse", optimizer=opt)

In [None]:
my_first_autoencoder.summary()

In [None]:
num_epochs = 3
batch_size = 32
model_history = my_first_autoencoder.fit(
            training_data, training_data,
            batch_size = batch_size,
            shuffle = True,
            validation_data= (validation_data, validation_data),
            epochs=num_epochs)

In [None]:
# plot the training loss and accuracy
N = num_epochs
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), model_history.history["loss"], '*-', label="train_loss")
plt.plot(np.arange(0, N), model_history.history["val_loss"], '*-', label="val_loss")
plt.title("Training Loss on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")

In [None]:
my_first_autoencoder.save("my_first_encoder.h5")