In [2]:
import sys
sys.path.append('../')
import os
import numpy as np
import matplotlib.pyplot as plt
import glob
from modules.shrec import utils,cvae_parameters, cvae, experiment, experiment_parameters, shrec2019utils, generator_images, cvae_basic

Using TensorFlow backend.


## Define the parameters for training
Train a Conditional Variational Autoencoder with a VGG-like architecture using a fixed number of downsampling and upsampling layers (num encoding layers and num decoding layers). Each of the convolutions in the network has a certain number of filters given in terms of the intermediate dim parameter. Other parameters include:

- **epochs**: Number of epochs for training
- **batch_size**: Number of images per batch of training
- **type_layer**: Type of layers used in the encoding and decoding networ. Either "convolutional" or "dense"
- **num_encoding_layers**: Number of the max pooling layers in the encoding network
- **num_decoding_layers**: Number of the upsampling layers in the decoding network
- **var_x**: Variance of the normal distribution for p(x|z,y)
- **r_loss**: Type of loss chosen for reconstruction (either binary cross entropy "binary" or mean squared error "mse")
- **latent_dim**: Dimensions of the latent space of the conditional variational autoencoder
- **intermediate_dim** Defines the number of filters in the case of the convolutions or the number of neurons in a convolutional or dense layer respectively.
- **learning_rate**: Learning rate of the optimizer used
- **image_shape**: Shape of the images used for training


In [3]:
epochs = 500
batch_size = 700
type_layer = "convolutional"
num_encoding_layers = 3
num_decoding_layers = 3
intermediate_dim  =  64
num_classes = 10
latent_dim = 100
learning_rate = 0.001
image_shape = (64,64,3)
var_x = 1.0

### Define data generator
The training of the network is done via a generator. The generator is created from a list of the paths to the corresponding images. 

In [4]:
main_directory = os.path.dirname(os.getcwd())
dataset_directory = os.path.join(main_directory, "dataset")
path_class_files = os.path.join(dataset_directory, "class_files")
# Read the file with the names of the models and images that will be used for training
cla_file_path_renders = os.path.join(path_class_files, "SceneIBR2018_Model_Training.cla")
cla_file_path_photographs = os.path.join(path_class_files, "SceneIBR2018_Image_Training.cla")

# Paths to the corresponding images folder
renders_path = os.path.join(dataset_directory,"renders_64")
photos_path = os.path.join(dataset_directory,"images_64")

# Renders
categories, model_lists = shrec2019utils.obtain_models_classes(cla_file_path_renders)
categories_array, models_array = shrec2019utils.categories_model_list_to_array(categories, model_lists)
# Photographs
categories_photographs, model_lists_photographs = shrec2019utils.obtain_models_classes2018(cla_file_path_photographs)
categories_array_photographs, models_array_photographs = shrec2019utils.categories_model_list_to_array(categories_photographs, model_lists_photographs)

print("Categories of renders {} amount {}".format(np.unique(categories), len(np.unique(categories))))
print("Categories of images {} amount {}".format(np.unique(categories_photographs), len(np.unique(categories_photographs))))
print("Amount of renders {}".format(len(models_array)))
print("Amount of images {}".format(len(models_array_photographs)))

Initial index barn 0 70
Initial index beach 0 70
Initial index bedroom 0 70
Initial index castle 0 70
Initial index classroom 0 70
Initial index desert 0 70
Initial index kitchen 0 70
Initial index library 0 70
Initial index mountain 0 70
Initial index river 0 70
Initial index barn 0 700
Initial index beach 0 700
Initial index bedroom 0 700
Initial index castle 0 700
Initial index classroom 0 700
Initial index desert 0 700
Initial index kitchen 0 700
Initial index library 0 700
Initial index mountain 0 700
Initial index river 0 700
Categories of renders ['barn' 'beach' 'bedroom' 'castle' 'classroom' 'desert' 'kitchen'
 'library' 'mountain' 'river'] amount 10
Categories of images ['barn' 'beach' 'bedroom' 'castle' 'classroom' 'desert' 'kitchen'
 'library' 'mountain' 'river'] amount 10
Amount of renders 700
Amount of images 7000


### Join identifiers for rendered images and photographs

Create a list comprising both, the identifiers for the renders and the images used for training. 

In [5]:
# Join the paths for the training renders and photographs
images_paths = []
joint_categories = []
type_data = []
for num_model, model in enumerate(models_array):
    for image_path in glob.glob(os.path.join(renders_path,  str(model)+'_*.png')):
        images_paths.append(image_path)
        joint_categories.append(categories_array[num_model])
        type_data.append("render")
number_of_renders = len(images_paths)
print("Amount of renders ",number_of_renders)   
for num_model, model in enumerate(models_array_photographs):
    for image_path in glob.glob(os.path.join(photos_path,  str(model)+'.png')):
        images_paths.append(image_path)
        joint_categories.append(categories_array_photographs[num_model])
        type_data.append("photograph")
number_of_photographs = len(images_paths)-number_of_renders
print("Amount of photographs ",number_of_photographs)   



Amount of renders  9100
Amount of photographs  7000


In [6]:
# Join the categories for each of the paths
joint_categories = np.array(joint_categories)
joint_categories = shrec2019utils.categories_array_to_numerical(joint_categories)
# Transform the categories to one hot encodings
joint_categories = utils.one_hotify(joint_categories)
print("Joint categories length {}".format(len(joint_categories)))
print("Joint paths length {}".format(len(images_paths)))

# Create the generator

dim = (image_shape[0], image_shape[1]) # resolution of the images
n_channels = image_shape[2] # number of color channels
generator = generator_images.ShrecGenerator(images_paths, joint_categories, dim= (64,64), n_channels = 3, batch_size=batch_size, n_classes = num_classes, flatten=False, reflect=True)



Joint categories length 16100
Joint paths length 16100


### Training of Conditional Variational Autoencoder
In this section an instance of the conditional variational autoencoder class is defined with respect to the parameters previously defined. The class containes the encoding and decoding neural network of the conditional variational autoencoder. 


In [7]:
# DEFINE THE NETWORK OF THE CONDITIONAL VARIATIONAL AUTOENCODER
cvae_parameters_dict = {"image_shape": image_shape,# shape of the input images
                       "num_classes": num_classes, # number of classes for the scenes
                       "type_layers":type_layer,# either "convolutional neural network" or "dense neural network"
                       "num_encoding_layers": num_encoding_layers, # number of the max pooling layers in the encoding
                       "num_decoding_layers": num_decoding_layers, # number of the upsampling layers in the decoding
                       "var_x":var_x, # variance of the normal distribution for p(x|z,y)
                       "r_loss":"binary", # type of loss chosen (either binary cross entropy "binary" or mean squared error "mse")
                       "latent_dim":latent_dim, # dimensions of the latent space of the conditional variational autoencoder
                       "intermediate_dim":intermediate_dim, # defines the number of filters in the convolutions
                       "learning_rate":learning_rate # learning rate of the optimizer
                       }
# Define the parameters for the CVAE
parameters = cvae_parameters.CVAEParams(**cvae_parameters_dict)
# Define the CVAE
cond_vae = cvae_basic.CVAE_Basic(parameters)

W1029 08:50:16.563773 140382105212736 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.



Convolutional


W1029 08:50:17.085240 140382105212736 deprecation_wrapper.py:119] From /home/luis/shrec_modules/cvae_shrec/modules/shrec/cvae.py:140: The name tf.log is deprecated. Please use tf.math.log instead.



Reconstruction loss is binary cross entropy
Reconstruction loss is binary cross entropy


### Start training experiment
Define an experiment for training. The experiment takes the instance of the conditional variational autoencoder, the experiment parameters and a folder for saving the tensorboard log files and the trained model weights.



In [None]:
# Define the parameters for training
experiment_params_dict = {"epochs": epochs,
                         "batch_size": batch_size}
exp_parameters  = experiment_parameters.ExperimentParams(**experiment_params_dict)

# Define an experiment
saving_model_folder = os.path.join(main_directory, "trained_models","simple_submission")

exp = experiment.Experiment(cond_vae, exp_parameters, None, saving_model_folder)

exp.run_image_generator(generator) # Training of the CVAE with the generator as a source of data

Epoch 1/500
 - 32s - loss: 8004.9685 - r_loss: 8004.8911 - kl_loss: 0.0023 - mean_squared_error: 0.0928 - classification_loss: 0.0758
Epoch 2/500
 - 31s - loss: 8004.9676 - r_loss: 8004.8892 - kl_loss: 0.0018 - mean_squared_error: 0.0928 - classification_loss: 0.0767
Epoch 3/500
 - 32s - loss: 8004.9471 - r_loss: 8004.8740 - kl_loss: 0.0016 - mean_squared_error: 0.0928 - classification_loss: 0.0728
Epoch 4/500
 - 32s - loss: 8004.9461 - r_loss: 8004.8770 - kl_loss: 0.0017 - mean_squared_error: 0.0928 - classification_loss: 0.0667
Epoch 5/500
 - 32s - loss: 8004.9482 - r_loss: 8004.8809 - kl_loss: 0.0021 - mean_squared_error: 0.0928 - classification_loss: 0.0651
Epoch 6/500
 - 32s - loss: 8004.9346 - r_loss: 8004.8711 - kl_loss: 0.0011 - mean_squared_error: 0.0928 - classification_loss: 0.0615
Epoch 7/500
 - 32s - loss: 8004.9349 - r_loss: 8004.8711 - kl_loss: 0.0013 - mean_squared_error: 0.0928 - classification_loss: 0.0633
Epoch 8/500
 - 31s - loss: 8004.9357 - r_loss: 8004.8755 - kl_

Epoch 62/500
 - 31s - loss: 6061.8095 - r_loss: 5954.9663 - kl_loss: 105.1567 - mean_squared_error: 0.0291 - classification_loss: 1.6861
Epoch 63/500
 - 32s - loss: 6060.1648 - r_loss: 5953.4355 - kl_loss: 105.0583 - mean_squared_error: 0.0290 - classification_loss: 1.6721
Epoch 64/500
 - 31s - loss: 6053.7286 - r_loss: 5945.8145 - kl_loss: 106.2523 - mean_squared_error: 0.0287 - classification_loss: 1.6620
Epoch 65/500
 - 31s - loss: 6050.7426 - r_loss: 5942.8369 - kl_loss: 106.2529 - mean_squared_error: 0.0286 - classification_loss: 1.6522
Epoch 66/500
 - 32s - loss: 6044.7515 - r_loss: 5935.9941 - kl_loss: 107.1106 - mean_squared_error: 0.0284 - classification_loss: 1.6465
Epoch 67/500
 - 32s - loss: 6040.3018 - r_loss: 5931.3154 - kl_loss: 107.3553 - mean_squared_error: 0.0283 - classification_loss: 1.6315
Epoch 68/500
 - 32s - loss: 6038.4251 - r_loss: 5928.9087 - kl_loss: 107.9048 - mean_squared_error: 0.0282 - classification_loss: 1.6121
Epoch 69/500
 - 31s - loss: 6033.2387 - r

Epoch 122/500
 - 32s - loss: 5937.8008 - r_loss: 5816.5742 - kl_loss: 119.9906 - mean_squared_error: 0.0247 - classification_loss: 1.2367
Epoch 123/500
 - 32s - loss: 5937.8562 - r_loss: 5816.9067 - kl_loss: 119.7160 - mean_squared_error: 0.0247 - classification_loss: 1.2345
Epoch 124/500
 - 32s - loss: 5935.4334 - r_loss: 5814.1426 - kl_loss: 120.0527 - mean_squared_error: 0.0246 - classification_loss: 1.2371
Epoch 125/500
 - 32s - loss: 5934.7906 - r_loss: 5813.2046 - kl_loss: 120.3604 - mean_squared_error: 0.0246 - classification_loss: 1.2254
Epoch 126/500
 - 32s - loss: 5935.2773 - r_loss: 5813.7998 - kl_loss: 120.2581 - mean_squared_error: 0.0246 - classification_loss: 1.2187
Epoch 127/500
 - 32s - loss: 5933.8090 - r_loss: 5812.0234 - kl_loss: 120.5667 - mean_squared_error: 0.0246 - classification_loss: 1.2197
Epoch 128/500
 - 31s - loss: 5932.6075 - r_loss: 5810.2798 - kl_loss: 121.1219 - mean_squared_error: 0.0245 - classification_loss: 1.2062
Epoch 129/500
 - 31s - loss: 5930.

Epoch 182/500
 - 31s - loss: 5901.0146 - r_loss: 5775.1357 - kl_loss: 124.8502 - mean_squared_error: 0.0237 - classification_loss: 1.0284
Epoch 183/500
 - 31s - loss: 5902.7492 - r_loss: 5777.3369 - kl_loss: 124.3895 - mean_squared_error: 0.0237 - classification_loss: 1.0228
Epoch 184/500
 - 31s - loss: 5902.9912 - r_loss: 5777.3198 - kl_loss: 124.6404 - mean_squared_error: 0.0237 - classification_loss: 1.0300
Epoch 185/500
 - 31s - loss: 5900.0876 - r_loss: 5774.5381 - kl_loss: 124.5277 - mean_squared_error: 0.0237 - classification_loss: 1.0211
Epoch 186/500
 - 32s - loss: 5902.5740 - r_loss: 5776.8945 - kl_loss: 124.6619 - mean_squared_error: 0.0237 - classification_loss: 1.0185
Epoch 187/500
 - 32s - loss: 5903.1198 - r_loss: 5778.0078 - kl_loss: 124.0929 - mean_squared_error: 0.0238 - classification_loss: 1.0189
Epoch 188/500
 - 32s - loss: 5899.6683 - r_loss: 5773.5649 - kl_loss: 125.0944 - mean_squared_error: 0.0236 - classification_loss: 1.0098
Epoch 189/500
 - 32s - loss: 5902.

Epoch 242/500
 - 32s - loss: 5895.5173 - r_loss: 5768.7651 - kl_loss: 125.8578 - mean_squared_error: 0.0235 - classification_loss: 0.8944
Epoch 243/500
 - 32s - loss: 5885.7953 - r_loss: 5758.8501 - kl_loss: 126.0556 - mean_squared_error: 0.0232 - classification_loss: 0.8908
Epoch 244/500
 - 32s - loss: 5887.0266 - r_loss: 5759.7690 - kl_loss: 126.3768 - mean_squared_error: 0.0232 - classification_loss: 0.8819
Epoch 245/500
 - 32s - loss: 5887.7648 - r_loss: 5760.1494 - kl_loss: 126.7303 - mean_squared_error: 0.0233 - classification_loss: 0.8850
Epoch 246/500
 - 31s - loss: 5888.3772 - r_loss: 5761.0425 - kl_loss: 126.4445 - mean_squared_error: 0.0233 - classification_loss: 0.8902
Epoch 247/500
 - 32s - loss: 5883.8925 - r_loss: 5756.7285 - kl_loss: 126.2814 - mean_squared_error: 0.0232 - classification_loss: 0.8829
Epoch 248/500
 - 32s - loss: 5888.0795 - r_loss: 5760.9194 - kl_loss: 126.2742 - mean_squared_error: 0.0233 - classification_loss: 0.8857
Epoch 249/500
 - 32s - loss: 5883.