In [1]:
%reload_ext autoreload

# Imports

import os
import sys
module_path = os.path.abspath(os.path.join('../image_gen_dm')) # or the path to your source code
sys.path.append(str(module_path))

import tensorflow as tf
import tf_keras as tfk
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

tfkl = tfk.layers
tfpl = tfp.layers
tfd = tfp.distributions

TF_ENABLE_ONEDNN_OPTS=0
os.environ['TF_ENABLE_ONEDNN_OPTS'] = str(TF_ENABLE_ONEDNN_OPTS)
os.environ['TG_GPU_ALLOCATOR'] = 'cuda_malloc_async'

import image_gen_vae as igvae
import image_gen_vae.utils as utils
import image_gen_vae.constants as consts

print('Tensorflow Version:', tf.__version__)
print("Available GPUs: ", len(tf.config.list_physical_devices('GPU')))

2024-04-30 17:00:14.664390: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Tensorflow Version: 2.16.1
Available GPUs:  1


2024-04-30 17:00:17.677651: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 17:00:17.694278: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 17:00:17.694328: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [2]:
# NC Constants

IMAGE_SIZE = 64
FLATTENED_SIZE = IMAGE_SIZE * IMAGE_SIZE * 3

ENCODER_HIDDEN_SIZES = [8192, 1792, 448]

DECODER_HIDDEN_SIZES = ENCODER_HIDDEN_SIZES[::-1]

LATENT_DIM = 2

MODEL_NAME = 'ncvae'
ENCODER_FILE_PATH = f'res/models/{MODEL_NAME}_encoder.h5'
DECODER_FILE_PATH = f'res/models/{MODEL_NAME}_decoder.h5'

LEARNING_RATE = 1e-4
VAL_PERCENTAGE = 0.1

In [2]:
# C Constants

IMAGE_SIZE = 64

ENCODER_HIDDEN_SIZES = [1792, 448]
DECODER_HIDDEN_SIZES = ENCODER_HIDDEN_SIZES[::-1]

ENCODER_CONV_CONFIG = [(144, 3, 2), (224, 3, 2), (576, 5, 4), (1152, 3, 2), (2048, 3, 2)]
DECODER_CONV_CONFIG = ENCODER_CONV_CONFIG[::-1]

FLATTENED_SHAPE = (1, 1, 2048)
FLATTENED_SIZE = 2048

LATENT_DIM = 2

MODEL_NAME = 'cvae'
ENCODER_FILE_PATH = f'res/models/{MODEL_NAME}_encoder.h5'
DECODER_FILE_PATH = f'res/models/{MODEL_NAME}_decoder.h5'

LEARNING_RATE = 1e-4
VAL_PERCENTAGE = 0.1

In [4]:
# Model class

class Sampling(tfkl.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

class NCVAE(tfk.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tfk.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tfk.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = tfk.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]
    
    def train_step(self, data):
        with tf.GradientTape() as tape:
            input_data, target_data = data
            z_mean, z_log_var, z = self.encoder(input_data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.losses.binary_crossentropy(target_data, reconstruction), 
                    axis=(1,2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
    
    def call(self, input_data, training=False):
        _, _, z = self.encoder(input_data)
        reconstructed_image = self.decoder(z)

        return reconstructed_image


In [4]:
# NC Model

image_input = tfkl.Input(shape=consts.INPUT_SHAPE, name='image_input')

flattened_image = tfkl.Flatten()(image_input)

x = flattened_image
for layer_size in ENCODER_HIDDEN_SIZES:
    x = tfkl.Dense(layer_size, 
            activation='relu',
            kernel_initializer='glorot_uniform')(x)
    
z_mean = tfkl.Dense(LATENT_DIM, name='z_mean', kernel_initializer='glorot_uniform')(x)
z_log_var = tfkl.Dense(LATENT_DIM, name='z_log_var', kernel_initializer='glorot_uniform')(x)

z = Sampling()([z_mean, z_log_var])

encoder = tfk.Model(inputs=image_input, outputs=[z_mean, z_log_var, z], name='encoder')
encoder.summary()

# Decoder

latent_inputs = tfkl.Input(shape=(LATENT_DIM,))

y = latent_inputs
for layer_size in DECODER_HIDDEN_SIZES:
    y = tfkl.Dense(layer_size, 
            activation='relu',
            kernel_initializer='glorot_uniform')(y)
    
reconstructed_flattened_image = tfkl.Dense(FLATTENED_SIZE, kernel_initializer='glorot_uniform', activation='sigmoid')(y)
reshaped_image = tfkl.Reshape(consts.OUTPUT_SHAPE)(reconstructed_flattened_image)

decoder = tfk.Model(inputs=latent_inputs, outputs=reshaped_image, name='decoder')
decoder.summary()

# Model
        
model = NCVAE(encoder, decoder)
model.compile(optimizer=tfk.optimizers.Adam(learning_rate=LEARNING_RATE))


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image_input (InputLayer)    [(None, 64, 64, 3)]          0         []                            
                                                                                                  
 flatten (Flatten)           (None, 12288)                0         ['image_input[0][0]']         
                                                                                                  
 dense (Dense)               (None, 8192)                 1006714   ['flatten[0][0]']             
                                                          88                                      
                                                                                                  
 dense_1 (Dense)             (None, 1792)                 1468185   ['dense[0][0]']         

2024-04-30 16:59:17.086579: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 16:59:17.086675: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 16:59:17.086704: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 16:59:17.438747: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-30 16:59:17.438839: I external/local_xla/xla/stream_executor

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2)]               0         
                                                                 
 dense_3 (Dense)             (None, 448)               1344      
                                                                 
 dense_4 (Dense)             (None, 1792)              804608    
                                                                 
 dense_5 (Dense)             (None, 8192)              14688256  
                                                                 
 dense_6 (Dense)             (None, 12288)             100675584 
                                                                 
 reshape (Reshape)           (None, 64, 64, 3)         0         
                                                                 
Total params: 116169792 (443.15 MB)
Trainable params: 11616

In [5]:
# C Model

image_input = tfkl.Input(shape=consts.INPUT_SHAPE, name='image_input')

x = image_input
for filters, kernel_size, strides in ENCODER_CONV_CONFIG:
    x = tfkl.Conv2D(filters=filters, kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='relu', padding='same')(x)

flattened_image = tfkl.Flatten()(x)

x = flattened_image
for layer_size in ENCODER_HIDDEN_SIZES:
    x = tfkl.Dense(layer_size, 
            activation='relu',
            kernel_initializer='glorot_uniform')(x)
    
z_mean = tfkl.Dense(LATENT_DIM, name='z_mean', kernel_initializer='glorot_uniform')(x)
z_log_var = tfkl.Dense(LATENT_DIM, name='z_log_var', kernel_initializer='glorot_uniform')(x)

z = Sampling()([z_mean, z_log_var])

encoder = tfk.Model(inputs=image_input, outputs=[z_mean, z_log_var, z], name='encoder')
encoder.summary()

# Decoder

latent_inputs = tfkl.Input(shape=(LATENT_DIM,))

y = latent_inputs
for layer_size in DECODER_HIDDEN_SIZES:
    y = tfkl.Dense(layer_size, 
            activation='relu',
            kernel_initializer='glorot_uniform')(y)
    
reconstructed_flattened_image = tfkl.Dense(FLATTENED_SIZE, kernel_initializer='glorot_uniform', activation='relu')(y)

y = tfkl.Reshape(FLATTENED_SHAPE)(reconstructed_flattened_image)

j = 0
for filters, kernel_size, strides in DECODER_CONV_CONFIG:
    if j == len(DECODER_CONV_CONFIG) - 1:
        y = tfkl.Conv2DTranspose(filters=3, kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='sigmoid', padding='same')(y)
    else:
        y = tfkl.Conv2DTranspose(filters=DECODER_CONV_CONFIG[j + 1][0], kernel_size=(kernel_size, kernel_size), strides=(strides, strides), activation='relu', padding='same')(y)
    j += 1

decoder = tfk.Model(inputs=latent_inputs, outputs=y, name='decoder')
decoder.summary()

# Model
    
model = NCVAE(encoder, decoder)
model.compile(optimizer=tfk.optimizers.Adam(learning_rate=LEARNING_RATE))


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image_input (InputLayer)    [(None, 64, 64, 3)]          0         []                            
                                                                                                  
 conv2d_5 (Conv2D)           (None, 32, 32, 144)          4032      ['image_input[0][0]']         
                                                                                                  
 conv2d_6 (Conv2D)           (None, 16, 16, 224)          290528    ['conv2d_5[0][0]']            
                                                                                                  
 conv2d_7 (Conv2D)           (None, 4, 4, 576)            3226176   ['conv2d_6[0][0]']            
                                                                                            

In [5]:
# NC Load weights

if os.path.exists(ENCODER_FILE_PATH) and os.path.exists(DECODER_FILE_PATH):
    encoder.load_weights(ENCODER_FILE_PATH)
    decoder.load_weights(DECODER_FILE_PATH)

else:
    print('No weights found')

In [6]:
# C Load weights

if os.path.exists(ENCODER_FILE_PATH) and os.path.exists(DECODER_FILE_PATH):
    encoder.load_weights(ENCODER_FILE_PATH)
    decoder.load_weights(DECODER_FILE_PATH)
else:
    print('No weights found')
    

In [6]:
# NC Reconstruction

images = utils.load_examples()

i = 0
for image in images:
    x = tf.expand_dims(image, axis=0)
    xhat = model(x)
    reconstruction = xhat[0]

    reconstruction = tf.clip_by_value(reconstruction, 0.0, 1.0)

    image_path = f'res/reconstructions/original{i}.png'
    tfk.preprocessing.image.save_img(image_path, image)

    image_path = f'res/reconstructions/ncvae{i}.png'
    tfk.preprocessing.image.save_img(image_path, reconstruction)

    i += 1

Loading examples...


2024-04-30 16:59:37.799236: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:
# C Reconstruction

images = utils.load_examples()

i = 0
for image in images:
    x = tf.expand_dims(image, axis=0)
    xhat = model(x)
    reconstruction = xhat[0]

    reconstruction = tf.clip_by_value(reconstruction, 0.0, 1.0)

    image_path = f'res/reconstructions/cvae{i}.png'
    tfk.preprocessing.image.save_img(image_path, reconstruction)

    i += 1

Loading examples...


2024-04-30 17:02:06.151698: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-04-30 17:02:06.199579: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8906
